Objective: 1) use avatar to create a duplicated data set and compare the performance of original and avatar dataset in a cox model. 2) Evalaute the variability for a given avatar with bootstraping 3) Evaluate variabiltiy between different Avatar using different seed 4) Evaluate effect of data augmentation (X4) 5) Evaluate Survtvae in comaprison to Avatar 6) Evaluate Survctgan in comaprison to Avatar
In this anlaysis, the covariates selected after bootstraping bootstepAIC each synhtetic dataset are used for the analyses of inter dataset variability
The difference with v3 is that here with fit the multivariate model for the evaluation of knn=x instead of fitting only for haplotype and maping on knn value : that means that we have ot run separately for the different knn value the script with hteir own mulitvariate models
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom 1.0.5 ✔ rsample 1.2.0
## ✔ dials 1.2.1 ✔ tune 1.2.0
## ✔ infer 1.0.6 ✔ workflows 1.1.4
## ✔ modeldata 1.3.0 ✔ workflowsets 1.0.1
## ✔ parsnip 1.2.0 ✔ yardstick 1.3.1
## ✔ recipes 1.0.10
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## • Use suppressPackageStartupMessages() to eliminate package startup messages
library(FNN)
library(survival)
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
library(corrplot)
## corrplot 0.92 loaded
library(ggcorrplot)
library(DataExplorer)
library(patchwork)
library(tableone)
library(boot)
##
## Attaching package: 'boot'
##
## The following object is masked from 'package:survival':
##
## aml
library(bootStepAIC)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:patchwork':
##
## area
##
## The following object is masked from 'package:dplyr':
##
## select
library(conflicted)
conflict_prefer("select", "dplyr")
## [conflicted] Will prefer dplyr::select over any other package.
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
library(readr)
original <- read_delim("td_dirc_perte_greffon.txt",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE) %>%
mutate(rejet_aigu = as.factor(rejet_aigu)
# age_r = scale(age_r),
# age_d = scale(age_d),
# TIF = scale(TIF),
) %>%
mutate_if(is.character, factor) %>%
select(-id) %>%
select(haplotype:delai_event)
## Rows: 253 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): id, haplotype, cyp3A5D, sexe_r, sexe_d, CYP3A4_1B, MDR1_C1236T, MDR...
## dbl (7): age_r, age_d, rejet_aigu, TIF, event, delai_event, pente_creat
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
str(original)
## tibble [253 × 10] (S3: tbl_df/tbl/data.frame)
## $ haplotype : Factor w/ 3 levels "autre","het",..: 2 2 3 2 2 2 3 3 1 2 ...
## $ cyp3A5D : Factor w/ 2 levels "Es","NEs": 2 2 1 1 2 1 1 2 2 2 ...
## $ age_r : num [1:253] 74 64 47 42 53 62 68 77 52 58 ...
## $ sexe_r : Factor w/ 2 levels "F","M": 1 2 2 2 2 1 1 2 2 1 ...
## $ age_d : num [1:253] 52 41 23 44 30 16 46 58 26 40 ...
## $ sexe_d : Factor w/ 2 levels "F","M": 2 2 2 2 2 1 1 1 2 2 ...
## $ rejet_aigu : Factor w/ 2 levels "0","1": 2 2 1 2 2 2 1 2 1 1 ...
## $ TIF : num [1:253] 1020 825 1020 827 1245 ...
## $ event : num [1:253] 1 1 1 1 1 1 1 1 1 1 ...
## $ delai_event: num [1:253] 10.21 5.78 3.91 3.27 6.79 ...
original1 <- original %>% mutate_if(is.factor, as.numeric)# converti en factor par ordre alhpahbetique+++
write.csv(original1, file = "original1.csv")
In this code we will vary the seed for avatar with a fix number of knn (here=5) and bootsptrap the Cox model The goal is to extract the variability of HR for a given dataset and between different seed to obtain the overall uncertainty
We launch a single Avatar with knn=5 and a given seed and we compare the results to the original data
data_normalized <- scale(original1)
pca <- prcomp(data_normalized, scale. = FALSE)# pour selecitonner le nombre de cp rank. = 3
# Number of neighbors
k <- 5 # Adjust this based on your requirement
algorithm
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed(12)
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
Generation of avatar in the latent space
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
Return to the initial scale
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
Transform into tibble
avatars_tibble_knn5 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
avatars_tibble_factor_knn5 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
) %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
Plot of the synthetic and original in the latent space
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
avatars_tibble_knn5 %>% mutate(DataType = 'Synthetic')
)
# Perform PCA on combined data
combined_data_normalized <- scale(combined_data[, -which(names(combined_data) %in% c("DataType", "id"))])
combined_pca <- prcomp(combined_data_normalized, scale. = FALSE)
# Extract the first two principal components
combined_pca_data <- data.frame(combined_pca$x[, 1:2])
combined_pca_data$DataType <- combined_data$DataType
# Plot PCA with color differentiation
ggplot(combined_pca_data, aes(x = PC1, y = PC2, color = DataType)) +
geom_point(alpha = 0.8) +
theme_minimal() +
labs(title = "PCA Plot", x = "Principal Component 1", y = "Principal Component 2", color = "Data Type")
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 253 | ||
| haplotype (%) | 0.022 | |||
| 1 | 97 (38.3) | 93 ( 36.8) | ||
| 2 | 123 (48.6) | 144 ( 56.9) | ||
| 3 | 33 (13.0) | 16 ( 6.3) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 217 ( 85.8) | 0.538 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 55.09 [24.19, 73.74] | 0.487 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 163 ( 64.4) | 0.581 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 39.82 [19.40, 68.49] | 0.642 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 185 ( 73.1) | 0.327 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 73 ( 28.9) | 0.499 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1174.55 [456.35, 2362.15] | 0.560 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 21 ( 8.3) | 1.000 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 5.36 [0.97, 14.94] | 0.866 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 253 (100.0) | <0.001 |
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(avatars_tibble_knn5)
## haplotype cyp3A5D age_r sexe_r
## Min. :1.000 Min. :1.000 Min. :24.19 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:45.93 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :55.09 Median :2.000
## Mean :1.696 Mean :1.858 Mean :53.06 Mean :1.644
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:61.18 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :73.74 Max. :2.000
## age_d sexe_d rejet_aigu TIF
## Min. :19.40 Min. :1.000 Min. :1.000 Min. : 456.3
## 1st Qu.:30.60 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1053.0
## Median :39.82 Median :2.000 Median :1.000 Median :1174.5
## Mean :39.04 Mean :1.731 Mean :1.289 Mean :1192.8
## 3rd Qu.:47.46 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1304.9
## Max. :68.49 Max. :2.000 Max. :2.000 Max. :2362.1
## event delai_event
## Min. :0.000 Min. : 0.9749
## 1st Qu.:0.000 1st Qu.: 3.6906
## Median :0.000 Median : 5.3569
## Mean :0.083 Mean : 5.7325
## 3rd Qu.:0.000 3rd Qu.: 7.0216
## Max. :1.000 Max. :14.9361
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(avatars_tibble_knn5, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn5)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn5)
##
## n= 253, number of events= 21
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 2.2993957 9.9681574 0.4532528 5.073 3.91e-07 ***
## cyp3A5D -0.4985711 0.6073979 0.7870725 -0.633 0.5264
## age_r 0.0622015 1.0641767 0.0333837 1.863 0.0624 .
## sexe_r 0.5921961 1.8079544 0.5394767 1.098 0.2723
## age_d 0.0713866 1.0739964 0.0366682 1.947 0.0516 .
## sexe_d 1.9252423 6.8568098 0.7852001 2.452 0.0142 *
## rejet_aigu 0.9571739 2.6043260 0.5889792 1.625 0.1041
## TIF -0.0003885 0.9996116 0.0010378 -0.374 0.7081
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 9.9682 0.1003 4.1002 24.234
## cyp3A5D 0.6074 1.6464 0.1299 2.841
## age_r 1.0642 0.9397 0.9968 1.136
## sexe_r 1.8080 0.5531 0.6280 5.205
## age_d 1.0740 0.9311 0.9995 1.154
## sexe_d 6.8568 0.1458 1.4715 31.951
## rejet_aigu 2.6043 0.3840 0.8210 8.261
## TIF 0.9996 1.0004 0.9976 1.002
##
## Concordance= 0.874 (se = 0.028 )
## Likelihood ratio test= 43.67 on 8 df, p=7e-07
## Wald test = 36.17 on 8 df, p=2e-05
## Score (logrank) test = 44.51 on 8 df, p=5e-07
ggforest(fit_synthetique)
Allow to see which vairable would have been selected
Original
boot.stepAIC(fit_original, original1, B = 100, k=log(nrow(original1)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 5.53 * df
##
## Covariates selected
## (%)
## haplotype 95
## rejet_aigu 47
## cyp3A5D 29
## age_d 21
## sexe_d 3
## Null 2
## sexe_r 2
## TIF 2
##
## Coefficients Sign
## + (%) - (%)
## age_d 100 0
## haplotype 100 0
## rejet_aigu 100 0
## sexe_d 100 0
## sexe_r 100 0
## cyp3A5D 0 100
## TIF 0 100
##
## Stat Significance
## (%)
## age_d 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
## TIF 100
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = original1)
##
## coef exp(coef) se(coef) z p
## haplotype 1.2035 3.3319 0.3276 3.674 0.000239
##
## Likelihood ratio test=14.01 on 1 df, p=0.0001822
## n= 253, number of events= 22
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 14 -24.59675 212.4446
## 2 - sexe_r 1 0.009067423 15 -24.58769 206.9203
## 3 - age_r 1 0.039034149 16 -24.54865 201.4259
## 4 - TIF 1 0.251427799 17 -24.29723 196.1439
## 5 - sexe_d 1 0.442126797 18 -23.85510 191.0527
## 6 - age_d 1 2.811491990 19 -21.04361 188.3308
## 7 - cyp3A5D 1 2.805921958 20 -18.23768 185.6033
## 8 - rejet_aigu 1 4.230950507 21 -14.00673 184.3009
synhtetic knn5
boot.stepAIC(fit_synthetique, avatars_tibble_knn5, B = 100, k=log(nrow(avatars_tibble_knn5)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn5)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 5.53 * df
##
## Covariates selected
## (%)
## haplotype 100
## sexe_d 44
## age_d 33
## rejet_aigu 26
## age_r 13
## sexe_r 10
## cyp3A5D 3
## TIF 2
##
## Coefficients Sign
## + (%) - (%)
## age_d 100.00 0.00
## age_r 100.00 0.00
## haplotype 100.00 0.00
## rejet_aigu 100.00 0.00
## sexe_d 100.00 0.00
## sexe_r 100.00 0.00
## cyp3A5D 33.33 66.67
## TIF 0.00 100.00
##
## Stat Significance
## (%)
## age_d 100.00
## age_r 100.00
## haplotype 100.00
## rejet_aigu 100.00
## sexe_r 100.00
## TIF 100.00
## sexe_d 84.09
## cyp3A5D 66.67
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = avatars_tibble_knn5)
##
## coef exp(coef) se(coef) z p
## haplotype 2.3545 10.5330 0.4374 5.383 7.32e-08
##
## Likelihood ratio test=30.11 on 1 df, p=4.087e-08
## n= 253, number of events= 21
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 13 -43.66527 188.3444
## 2 - TIF 1 0.1453500 14 -43.51992 182.9564
## 3 - cyp3A5D 1 0.3324782 15 -43.18744 177.7554
## 4 - sexe_r 1 1.0935263 16 -42.09391 173.3156
## 5 - age_r 1 2.6161991 17 -39.47771 170.3984
## 6 - rejet_aigu 1 1.4420932 18 -38.03562 166.3071
## 7 - sexe_d 1 5.2900187 19 -32.74560 166.0637
## 8 - age_d 1 2.6377284 20 -30.10787 163.1681
fit_original <- coxph(Surv(delai_event, event) ~ haplotype , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2035 3.3319 0.3276 3.674 0.000239 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.332 0.3001 1.753 6.332
##
## Concordance= 0.682 (se = 0.044 )
## Likelihood ratio test= 14.01 on 1 df, p=2e-04
## Wald test = 13.5 on 1 df, p=2e-04
## Score (logrank) test = 14.91 on 1 df, p=1e-04
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype , data = avatars_tibble_knn5)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = avatars_tibble_knn5)
##
## n= 253, number of events= 21
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 2.3545 10.5330 0.4374 5.383 7.32e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 10.53 0.09494 4.469 24.82
##
## Concordance= 0.748 (se = 0.033 )
## Likelihood ratio test= 30.11 on 1 df, p=4e-08
## Wald test = 28.98 on 1 df, p=7e-08
## Score (logrank) test = 28.45 on 1 df, p=1e-07
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=avatars_tibble_knn5, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data <- data.frame(HR=boot_hrs[,1])
# Calculate summary statistics
summary_stats <- quantile(hr_data$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1))
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
geom_vline(aes(xintercept=summary_stats["25th"]), color="gray", linetype="dashed", linewidth=2) +
geom_vline(aes(xintercept=summary_stats["Median"]), color="blue", linetype="dashed", linewidth=2) +
geom_vline(aes(xintercept=summary_stats["75th"]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
print(summary_stats)
## Min 2.5th 5th 25th Median 75th 95th 97.5th
## 6.517357 6.905228 7.364727 8.959359 11.096550 13.685278 20.473030 22.418775
## Max
## 44.264827
bootstraping of the original data for comparison
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=original1, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data <- data.frame(HR=boot_hrs[,1])
# Calculate summary statistics
summary_stats <- quantile(hr_data$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1))
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
geom_vline(aes(xintercept=summary_stats["25th"]), color="gray", linetype="dashed", linewidth=2) +
geom_vline(aes(xintercept=summary_stats["Median"]), color="blue", linetype="dashed", linewidth=2) +
geom_vline(aes(xintercept=summary_stats["75th"]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
print(summary_stats)
## Min 2.5th 5th 25th Median 75th 95th 97.5th
## 1.761377 1.935648 2.149621 2.864674 3.404484 4.246031 7.064679 7.651689
## Max
## 12.060006
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
km_original_plot <- ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_original_plot
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = avatars_tibble_factor_knn5)
km_synthetique_avatar_5 <- ggsurvplot(
km_synthetique,
data = avatars_tibble_factor_knn5,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_avatar_5
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), avatars_tibble_factor_knn5 %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
pm_knn5 <- combined_df %>% select(haplotype:delai_event, group) %>%
rename(acute_rejection = rejet_aigu, sex_r = sexe_r, sex_d = sexe_d) %>%
ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_knn5
ggsave("Figure1_graft_loss.pdf")
## Saving 7 x 5 in image
# ggsave("comparaison_distribution_knn5.pdf")
Allow to define the variability range of HR for different Avatar generated with different seed but the same knn (inter dataset variability)
# Assuming all your existing functions and necessary libraries are loaded
run_model_with_seed <- function(seed_value) {
# Number of neighbors
k <- 5 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed(seed_value)
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble_knn5 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
avatars_tibble_factor_knn5 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
) %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
# Finally, fit the Cox model
fit <- coxph(Surv(delai_event, event) ~ haplotype ,
data = avatars_tibble_knn5)
# Calculate confidence intervals
ci <- confint(fit)
return(list(fit = fit, ci = ci))
}
extract_hrs_and_cis <- function(model_output) {
coefs <- model_output$fit$coefficients
ci <- model_output$ci
hr <- exp(coefs)
ci_lower <- exp(ci[,"2.5 %"])
ci_upper <- exp(ci[,"97.5 %"])
return(data.frame(variable = names(hr), hr = hr, ci_lower = ci_lower, ci_upper = ci_upper))
}
# Generate a list of seed values
seed_values <- sample(x=100) # Modify this if you need different seed values
# Apply the algorithm with different seed values
model_results <- map(seed_values, run_model_with_seed)
# Extract HR and CI from model results
extracted_results <- map(model_results, extract_hrs_and_cis)
# Combine results into a single data frame
combined_results <- bind_rows(extracted_results)
# Calculate median HR and CI for each variable
# aggregate_metrics <- combined_results %>%
# group_by(variable) %>%
# summarize(
# median_hr = median(hr),
# median_ci_lower = median(ci_lower),
# median_ci_upper = median(ci_upper)
# )
#
# aggregate_metrics
# Calculate the specified percentiles for HRs for each variable
percentile_metrics <- combined_results %>%
group_by(variable) %>%
summarize(
percentile_0 = quantile(hr, probs = 0),
percentile_5 = quantile(hr, probs = 0.05),
percentile_25 = quantile(hr, probs = 0.25),
percentile_50 = quantile(hr, probs = 0.5),
percentile_75 = quantile(hr, probs = 0.75),
percentile_95 = quantile(hr, probs = 0.95),
percentile_100 = quantile(hr, probs = 1)
) %>%
pivot_longer(-variable, names_to = "Percentile_HR", values_to = "Value_HR") %>%
mutate(Value_HR = round(Value_HR, 2))
# percentile_metrics
# datatable(percentile_metrics)
knitr::kable(percentile_metrics %>% mutate(Value_HR = round(Value_HR, 2)), "simple")
| variable | Percentile_HR | Value_HR |
|---|---|---|
| haplotype | percentile_0 | 3.20 |
| haplotype | percentile_5 | 4.07 |
| haplotype | percentile_25 | 5.25 |
| haplotype | percentile_50 | 6.17 |
| haplotype | percentile_75 | 7.30 |
| haplotype | percentile_95 | 9.35 |
| haplotype | percentile_100 | 24.91 |
Allow to define the variability range of HR for different Avatar generated with different seed and different values of knn
# # Assuming all necessary functions (PCA transformation, KNN, avatar generation, etc.) are defined
#
# run_analysis_for_k_and_seed <- function(k, seed_value) {
#
#
# pca_transformed_data <- pca$x
# knn_result <- get.knn(pca_transformed_data, k)
#
# generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
# n <- nrow(pca_transformed_data)
# avatar_weights <- matrix(nrow = n, ncol = k)
#
# for (i in 1:n) {
# # Step 1: Inverse of Distances
# distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
# inverse_distances <- 1 / distances
#
# # Step 2: Random Weights
#
# random_weights <- rexp(k, rate = 1)
#
# # Step 3: Contribution Factors
#
# shuffled_indices <- sample(k)
# contribution_factors <- 1 / (2^shuffled_indices)
#
# # Step 4: Calculate Weights
# weights <- inverse_distances * random_weights * contribution_factors
#
# # Step 5: Normalize Weights
# normalized_weights <- weights / sum(weights)
#
# avatar_weights[i, ] <- normalized_weights
# }
#
# return(avatar_weights)
# }
#
#
#
# # Generate avatar weights
# set.seed(seed_value)
# avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
#
#
# # Function to generate avatars in PCA space based on weights
# generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
# n <- nrow(pca_transformed_data)
# avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
#
# for (i in 1:n) {
# weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
# avatars_pca[i, ] <- colSums(weighted_avatars)
# }
#
# return(avatars_pca)
# }
# # Generate avatars in PCA space
# avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
#
# # Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# # Inverse PCA transformation
# inverse_pca <- function(pca_object, pca_data) {
# return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
# }
# avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
#
#
# # Inverse normalization (if the original data was normalized)
# avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
# avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
#
# avatars_tibble_knn <- as_tibble(avatars_rescaled) %>%
# mutate(haplotype = round(haplotype, digits=0),
# cyp3A5D = round(cyp3A5D, digits=0),
# sexe_r = round(sexe_r , digits=0),
# sexe_d = round(sexe_d , digits=0),
# rejet_aigu = round(rejet_aigu , digits=0),
# event = round(event, digits=0)
# # CYP3A4_1B = round(CYP3A4_1B, digits=0),
# # MDR1_C1236T = round(MDR1_C1236T, digits=0),
# # MDR1_G2677T = round(MDR1_G2677T, digits=0),
# # MDR1_C3435T = round(MDR1_C3435T, digits=0)
# )
#
# avatars_tibble_factor_knn <- as_tibble(avatars_rescaled) %>%
# mutate(haplotype = round(haplotype, digits=0),
# cyp3A5D = round(cyp3A5D, digits=0),
# sexe_r = round(sexe_r , digits=0),
# sexe_d = round(sexe_d , digits=0),
# rejet_aigu = round(rejet_aigu , digits=0),
# event = round(event, digits=0)
# # CYP3A4_1B = round(CYP3A4_1B, digits=0),
# # MDR1_C1236T = round(MDR1_C1236T, digits=0),
# # MDR1_G2677T = round(MDR1_G2677T, digits=0),
# # MDR1_C3435T = round(MDR1_C3435T, digits=0)
# ) %>%
# mutate(haplotype = as.factor(haplotype),
# cyp3A5D = as.factor(cyp3A5D),
# sexe_r = as.factor(sexe_r),
# sexe_d = as.factor(sexe_d),
# # CYP3A4_1B = as.factor(CYP3A4_1B),
# # MDR1_C1236T = as.factor(MDR1_C1236T),
# # MDR1_G2677T = as.factor(MDR1_G2677T),
# # MDR1_C3435T = as.factor(MDR1_C3435T),
# rejet_aigu = as.factor(rejet_aigu))
#
#
# # Finally, fit the Cox model
# fit <- coxph(Surv(delai_event, event) ~ haplotype , data = avatars_tibble_knn)
#
#
# # Calculate confidence intervals
# ci <- confint(fit)
#
# return(list(fit = fit, ci = ci))
# }
#
# extract_hrs_and_cis <- function(model_output) {
# coefs <- model_output$fit$coefficients
# ci <- model_output$ci
#
# hr <- exp(coefs)
# ci_lower <- exp(ci[,"2.5 %"])
# ci_upper <- exp(ci[,"97.5 %"])
#
# return(data.frame(variable = names(hr), hr = hr, ci_lower = ci_lower, ci_upper = ci_upper))
# }
#
# run_for_k_values <- function(k) {
# seed_values <- sample(x = 100)
# model_results <- map(seed_values, ~run_analysis_for_k_and_seed(k, .x))
# extracted_results <- map(model_results, extract_hrs_and_cis)
# combined_results <- bind_rows(extracted_results)
#
# aggregate_metrics <- combined_results %>%
# group_by(variable) %>%
# summarize(
# percentile_0 = quantile(hr, probs = 0, na.rm = TRUE),
# percentile_5 = quantile(hr, probs = 0.05, na.rm = TRUE),
# percentile_25 = quantile(hr, probs = 0.25, na.rm = TRUE),
# percentile_50 = quantile(hr, probs = 0.5, na.rm = TRUE),
# percentile_75 = quantile(hr, probs = 0.75, na.rm = TRUE),
# percentile_95 = quantile(hr, probs = 0.95, na.rm = TRUE),
# percentile_100 = quantile(hr, probs = 1, na.rm = TRUE)
# )
#
#
#
# return(aggregate_metrics)
# }
#
# # Define different k values
# k_values <- c(3, 5, 10, 15, 20, 50)
#
# # Apply the analysis for each k value
# results_for_k_values <- map(k_values, run_for_k_values)
# names(results_for_k_values) <- paste("K =", k_values)
#
# results_for_k_values
#
# # datatable(percentile_metrics)
# knitr::kable(results_for_k_values, "simple")
We investigate the effect of data augmentaiotn with a defined seed and knn=5
data_augment_avatar <- function(x) {
data_normalized <- scale(original1)
pca <- prcomp(data_normalized, scale. = FALSE)# pour selecitonner le nombre de cp rank. = 3
# Number of neighbors
k <- 5 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed( str_c(1,x))
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
}
iteration <- c(1:4)
augmented_data_5 <- map_dfr(iteration, data_augment_avatar, .id = "iter_")
augmented_data_5 <- augmented_data_5 %>%
select(-iter_)
augmented_data_5_factor_knn5 <- augmented_data_5 %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
Plot of the synthetic and original in the latent space
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
augmented_data_5 %>% mutate(DataType = 'Synthetic')
)
# Perform PCA on combined data
combined_data_normalized <- scale(combined_data[, -which(names(combined_data) %in% c("DataType", "id"))])
combined_pca <- prcomp(combined_data_normalized, scale. = FALSE)
# Extract the first two principal components
combined_pca_data <- data.frame(combined_pca$x[, 1:2])
combined_pca_data$DataType <- combined_data$DataType
# Plot PCA with color differentiation
ggplot(combined_pca_data, aes(x = PC1, y = PC2, color = DataType)) +
geom_point(alpha = 0.5) +
theme_minimal() +
labs(title = "PCA Plot", x = "Principal Component 1", y = "Principal Component 2", color = "Data Type")
Export data augmented knn = 5
write_csv(augmented_data_5, file = "avatar_sfpt_knn5_data_augmented.csv")
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 1012 | ||
| haplotype (%) | <0.001 | |||
| 1 | 97 (38.3) | 373 ( 36.9) | ||
| 2 | 123 (48.6) | 577 ( 57.0) | ||
| 3 | 33 (13.0) | 62 ( 6.1) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 859 ( 84.9) | 0.627 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 55.42 [23.08, 77.36] | 0.722 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 656 ( 64.8) | 0.387 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 39.96 [15.04, 68.49] | 0.382 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 717 ( 70.8) | 0.569 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 294 ( 29.1) | 0.397 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1158.71 [456.35, 2362.15] | 0.895 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 86 ( 8.5) | 1.000 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 5.45 [0.97, 14.94] | 0.516 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 1012 (100.0) | <0.001 |
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(augmented_data_5)
## haplotype cyp3A5D age_r sexe_r
## Min. :1.000 Min. :1.000 Min. :23.08 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:46.18 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :55.42 Median :2.000
## Mean :1.693 Mean :1.849 Mean :53.57 Mean :1.648
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:62.12 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :77.36 Max. :2.000
## age_d sexe_d rejet_aigu TIF
## Min. :15.04 Min. :1.000 Min. :1.000 Min. : 456.3
## 1st Qu.:30.19 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1045.9
## Median :39.96 Median :2.000 Median :1.000 Median :1158.7
## Mean :39.29 Mean :1.708 Mean :1.291 Mean :1174.6
## 3rd Qu.:47.90 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1278.0
## Max. :68.49 Max. :2.000 Max. :2.000 Max. :2362.1
## event delai_event
## Min. :0.00000 Min. : 0.9673
## 1st Qu.:0.00000 1st Qu.: 3.8059
## Median :0.00000 Median : 5.4479
## Mean :0.08498 Mean : 5.8418
## 3rd Qu.:0.00000 3rd Qu.: 7.3332
## Max. :1.00000 Max. :14.9361
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(augmented_data_5, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = augmented_data_5)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = augmented_data_5)
##
## n= 1012, number of events= 86
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.8218542 6.1833132 0.2007713 9.074 < 2e-16 ***
## cyp3A5D -1.0012032 0.3674371 0.3486473 -2.872 0.00408 **
## age_r 0.0442697 1.0452643 0.0135693 3.263 0.00110 **
## sexe_r 0.2190860 1.2449383 0.2474680 0.885 0.37599
## age_d 0.0392892 1.0400712 0.0146711 2.678 0.00741 **
## sexe_d 2.2586712 9.5703635 0.3732126 6.052 1.43e-09 ***
## rejet_aigu 1.1536414 3.1697141 0.2689862 4.289 1.80e-05 ***
## TIF -0.0009711 0.9990294 0.0005792 -1.677 0.09362 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 6.1833 0.1617 4.1718 9.1647
## cyp3A5D 0.3674 2.7216 0.1855 0.7277
## age_r 1.0453 0.9567 1.0178 1.0734
## sexe_r 1.2449 0.8033 0.7665 2.0221
## age_d 1.0401 0.9615 1.0106 1.0704
## sexe_d 9.5704 0.1045 4.6052 19.8888
## rejet_aigu 3.1697 0.3155 1.8709 5.3701
## TIF 0.9990 1.0010 0.9979 1.0002
##
## Concordance= 0.854 (se = 0.017 )
## Likelihood ratio test= 156.1 on 8 df, p=<2e-16
## Wald test = 136.3 on 8 df, p=<2e-16
## Score (logrank) test = 167.8 on 8 df, p=<2e-16
ggforest(fit_synthetique)
BootstepAIC augmented synthetic knn5
boot.stepAIC(fit_synthetique, augmented_data_5, B = 100, k=log(nrow(augmented_data_5)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = augmented_data_5)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 6.92 * df
##
## Covariates selected
## (%)
## haplotype 100
## sexe_d 100
## rejet_aigu 85
## age_r 67
## age_d 59
## cyp3A5D 36
## TIF 15
## sexe_r 7
##
## Coefficients Sign
## + (%) - (%)
## age_d 100 0
## age_r 100 0
## haplotype 100 0
## rejet_aigu 100 0
## sexe_d 100 0
## sexe_r 100 0
## cyp3A5D 0 100
## TIF 0 100
##
## Stat Significance
## (%)
## age_d 100
## age_r 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
## TIF 100
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + age_r +
## sexe_d + rejet_aigu, data = augmented_data_5)
##
## coef exp(coef) se(coef) z p
## haplotype 1.86461 6.45345 0.19723 9.454 < 2e-16
## age_r 0.03991 1.04072 0.01287 3.100 0.00193
## sexe_d 1.73449 5.66604 0.33156 5.231 1.68e-07
## rejet_aigu 1.18776 3.27974 0.26848 4.424 9.69e-06
##
## Likelihood ratio test=139.8 on 4 df, p=< 2.2e-16
## n= 1012, number of events= 86
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype + age_r + sexe_d + rejet_aigu
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 78 -156.1183 885.8237
## 2 - sexe_r 1 0.7927363 79 -155.3256 879.6968
## 3 - TIF 1 3.1531373 80 -152.1724 875.9302
## 4 - cyp3A5D 1 5.9361800 81 -146.2362 874.9467
## 5 - age_d 1 6.4243828 82 -139.8119 874.4514
Final model original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + rejet_aigu , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + rejet_aigu,
## data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.1681 3.2160 0.3261 3.582 0.000341 ***
## rejet_aigu 0.9238 2.5188 0.4661 1.982 0.047482 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.216 0.3109 1.697 6.094
## rejet_aigu 2.519 0.3970 1.010 6.280
##
## Concordance= 0.732 (se = 0.05 )
## Likelihood ratio test= 18.24 on 2 df, p=1e-04
## Wald test = 17.44 on 2 df, p=2e-04
## Score (logrank) test = 19.29 on 2 df, p=6e-05
Final model synthetic
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + age_r + sexe_d + rejet_aigu , data = augmented_data_5)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + age_r +
## sexe_d + rejet_aigu, data = augmented_data_5)
##
## n= 1012, number of events= 86
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.86461 6.45345 0.19723 9.454 < 2e-16 ***
## age_r 0.03991 1.04072 0.01287 3.100 0.00193 **
## sexe_d 1.73449 5.66604 0.33156 5.231 1.68e-07 ***
## rejet_aigu 1.18776 3.27974 0.26848 4.424 9.69e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 6.453 0.1550 4.384 9.499
## age_r 1.041 0.9609 1.015 1.067
## sexe_d 5.666 0.1765 2.958 10.852
## rejet_aigu 3.280 0.3049 1.938 5.551
##
## Concordance= 0.849 (se = 0.019 )
## Likelihood ratio test= 139.8 on 4 df, p=<2e-16
## Wald test = 129.7 on 4 df, p=<2e-16
## Score (logrank) test = 152.8 on 4 df, p=<2e-16
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = augmented_data_5_factor_knn5)
km_synthetique_avatar_5_augmented <- ggsurvplot(
km_synthetique,
data = augmented_data_5_factor_knn5,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_avatar_5_augmented
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), augmented_data_5_factor_knn5 %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
Graphical exploraiotn of distribution
library(GGally)
pm_knn5_augmented <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_knn5_augmented
# ggsave("comparaison_distribution_augmented_knn5.pdf")
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype + age_r + sexe_d + rejet_aigu , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=augmented_data_5, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data_haplo <- data.frame(HR=boot_hrs[,1])
hr_data_age_r <- data.frame(HR=boot_hrs[,2])
hr_data_sexe_d <- data.frame(HR=boot_hrs[,3])
hr_data_rejet_aigu <- data.frame(HR=boot_hrs[,4])
# Calculate summary statistics
summary_stats <- quantile(hr_data_haplo$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) %>%
bind_rows(quantile(hr_data_age_r$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) ) %>%
bind_rows(quantile(hr_data_sexe_d$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) ) %>%
bind_rows(quantile(hr_data_rejet_aigu$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) )
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data_haplo, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# # geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
# geom_vline(aes(xintercept=summary_stats["25th"][[1]][[1]]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Median"][[1]][[1]]), color="blue", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["75th"])[[1]][[1]], color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
knitr::kable(summary_stats, "simple")
| Min | 2.5th | 5th | 25th | Median | 75th | 95th | 97.5th | Max |
|---|---|---|---|---|---|---|---|---|
| 4.519891 | 5.175495 | 5.293237 | 5.976295 | 6.677334 | 7.692327 | 8.682031 | 8.968824 | 9.451237 |
| 1.012048 | 1.022511 | 1.024808 | 1.035579 | 1.039754 | 1.046965 | 1.057784 | 1.063154 | 1.068696 |
| 2.167422 | 3.325784 | 3.557883 | 5.048900 | 6.003292 | 7.209618 | 8.929283 | 11.123687 | 20.182587 |
| 1.533517 | 1.826242 | 1.947511 | 2.620763 | 3.133227 | 3.898546 | 5.604837 | 6.093282 | 7.779243 |
Allow to define the inter variability range of HR for augmented knn=5 (inter dataset variability) by using 100 bootraps
# Assuming all your existing functions and necessary libraries are loaded
run_model_with_seed <- function(seed_value) {
# augmentaiotn of data
data_augment_avatar <- function(x) {
data_normalized <- scale(original1)
pca <- prcomp(data_normalized, scale. = FALSE)# pour selecitonner le nombre de cp rank. = 3
# Number of neighbors
k <- 5 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
}
iteration <- c(1:4)
set.seed(seed_value)
augmented_data_x <- map_dfr(iteration, data_augment_avatar, .id = "iter_")
augmented_data_x <- augmented_data_x %>%
select(-iter_)
###############
# Finally, fit the Cox model
fit <- coxph(Surv(delai_event, event) ~ haplotype +age_r + sexe_d + rejet_aigu,
data = augmented_data_x)
coefs <- fit$coefficients
hr <- exp(coefs)
return(data.frame(variable = names(hr), hr = hr))
# Calculate confidence intervals
# ci <- confint(fit)
# return(list(fit = fit, ci = ci))
}
# Generate a list of seed values
seed_value <- sample(x=100) # Modify this if you need different seed values
# Apply the algorithm with different seed values
model_results <- map(seed_value, run_model_with_seed)
# Extract HR and CI from model results
#extracted_results <- map(model_results, extract_hrs_and_cis)
# Combine results into a single data frame
combined_results <- bind_rows(model_results)
# Calculate median HR and CI for each variable
# aggregate_metrics <- combined_results %>%
# group_by(variable) %>%
# summarize(
# median_hr = median(hr),
# median_ci_lower = median(ci_lower),
# median_ci_upper = median(ci_upper)
# )
#
# aggregate_metrics
# Calculate the specified percentiles for HRs for each variable
percentile_metrics <- combined_results %>%
group_by(variable) %>%
summarize(
percentile_0 = quantile(hr, probs = 0),
percentile_5 = quantile(hr, probs = 0.05),
percentile_25 = quantile(hr, probs = 0.25),
percentile_50 = quantile(hr, probs = 0.5),
percentile_75 = quantile(hr, probs = 0.75),
percentile_95 = quantile(hr, probs = 0.95),
percentile_100 = quantile(hr, probs = 1)
) %>%
pivot_longer(-variable, names_to = "Percentile_HR", values_to = "Value_HR") %>%
mutate(Value_HR = round(Value_HR, 2))
# percentile_metrics
# datatable(percentile_metrics)
knitr::kable(percentile_metrics %>% mutate(Value_HR = round(Value_HR, 2)), "simple")
| variable | Percentile_HR | Value_HR |
|---|---|---|
| age_r | percentile_0 | 1.02 |
| age_r | percentile_5 | 1.03 |
| age_r | percentile_25 | 1.03 |
| age_r | percentile_50 | 1.04 |
| age_r | percentile_75 | 1.04 |
| age_r | percentile_95 | 1.05 |
| age_r | percentile_100 | 1.05 |
| haplotype | percentile_0 | 4.04 |
| haplotype | percentile_5 | 4.44 |
| haplotype | percentile_25 | 5.08 |
| haplotype | percentile_50 | 5.53 |
| haplotype | percentile_75 | 6.06 |
| haplotype | percentile_95 | 7.27 |
| haplotype | percentile_100 | 8.37 |
| rejet_aigu | percentile_0 | 2.43 |
| rejet_aigu | percentile_5 | 3.05 |
| rejet_aigu | percentile_25 | 3.56 |
| rejet_aigu | percentile_50 | 4.17 |
| rejet_aigu | percentile_75 | 4.71 |
| rejet_aigu | percentile_95 | 6.13 |
| rejet_aigu | percentile_100 | 7.58 |
| sexe_d | percentile_0 | 1.29 |
| sexe_d | percentile_5 | 1.90 |
| sexe_d | percentile_25 | 2.49 |
| sexe_d | percentile_50 | 2.95 |
| sexe_d | percentile_75 | 3.49 |
| sexe_d | percentile_95 | 4.35 |
| sexe_d | percentile_100 | 8.81 |
# Number of neighbors
k <- 20 # Adjust this based on your requirement
algorithm
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed(12)
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
Generation of avatar in the latent space
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
Return to the initial scale
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
Transform into tibble
avatars_tibble_knn20 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
avatars_tibble_factor_knn20 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
) %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
Plot of the synthetic and original in the latent space
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
avatars_tibble_knn20 %>% mutate(DataType = 'Synthetic')
)
# Perform PCA on combined data
combined_data_normalized <- scale(combined_data[, -which(names(combined_data) %in% c("DataType", "id"))])
combined_pca <- prcomp(combined_data_normalized, scale. = FALSE)
# Extract the first two principal components
combined_pca_data <- data.frame(combined_pca$x[, 1:2])
combined_pca_data$DataType <- combined_data$DataType
# Plot PCA with color differentiation
ggplot(combined_pca_data, aes(x = PC1, y = PC2, color = DataType)) +
geom_point(alpha = 0.8) +
theme_minimal() +
labs(title = "PCA Plot", x = "Principal Component 1", y = "Principal Component 2", color = "Data Type")
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 253 | ||
| haplotype (%) | 0.001 | |||
| 1 | 97 (38.3) | 79 ( 31.2) | ||
| 2 | 123 (48.6) | 159 ( 62.8) | ||
| 3 | 33 (13.0) | 15 ( 5.9) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 224 ( 88.5) | 0.125 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 55.12 [25.46, 74.57] | 0.715 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 178 ( 70.4) | 0.049 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 39.04 [18.38, 67.75] | 0.643 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 188 ( 74.3) | 0.200 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 63 ( 24.9) | 0.094 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1135.02 [630.81, 2091.07] | 0.271 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 13 ( 5.1) | 0.161 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 5.34 [1.20, 15.10] | 0.779 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 253 (100.0) | <0.001 |
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(avatars_tibble_knn20)
## haplotype cyp3A5D age_r sexe_r
## Min. :1.000 Min. :1.000 Min. :25.46 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:46.65 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :55.12 Median :2.000
## Mean :1.747 Mean :1.885 Mean :53.70 Mean :1.704
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:61.25 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :74.57 Max. :2.000
## age_d sexe_d rejet_aigu TIF
## Min. :18.38 Min. :1.000 Min. :1.000 Min. : 630.8
## 1st Qu.:30.96 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1027.6
## Median :39.04 Median :2.000 Median :1.000 Median :1135.0
## Mean :39.03 Mean :1.743 Mean :1.249 Mean :1148.5
## 3rd Qu.:46.51 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:1253.4
## Max. :67.75 Max. :2.000 Max. :2.000 Max. :2091.1
## event delai_event
## Min. :0.00000 Min. : 1.204
## 1st Qu.:0.00000 1st Qu.: 3.946
## Median :0.00000 Median : 5.344
## Mean :0.05138 Mean : 5.742
## 3rd Qu.:0.00000 3rd Qu.: 7.278
## Max. :1.00000 Max. :15.099
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(avatars_tibble_knn20, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn20)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn20)
##
## n= 253, number of events= 13
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 2.179268 8.839834 0.955361 2.281 0.0225 *
## cyp3A5D -1.619692 0.197960 0.839800 -1.929 0.0538 .
## age_r -0.004690 0.995321 0.036846 -0.127 0.8987
## sexe_r -0.465605 0.627755 0.710697 -0.655 0.5124
## age_d 0.053252 1.054696 0.046420 1.147 0.2513
## sexe_d -0.372408 0.689073 0.635743 -0.586 0.5580
## rejet_aigu 1.140204 3.127407 0.641104 1.779 0.0753 .
## TIF -0.003864 0.996143 0.001955 -1.977 0.0480 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 8.8398 0.1131 1.35906 57.497
## cyp3A5D 0.1980 5.0515 0.03817 1.027
## age_r 0.9953 1.0047 0.92598 1.070
## sexe_r 0.6278 1.5930 0.15590 2.528
## age_d 1.0547 0.9481 0.96297 1.155
## sexe_d 0.6891 1.4512 0.19821 2.396
## rejet_aigu 3.1274 0.3198 0.89018 10.987
## TIF 0.9961 1.0039 0.99233 1.000
##
## Concordance= 0.823 (se = 0.055 )
## Likelihood ratio test= 21.87 on 8 df, p=0.005
## Wald test = 16.79 on 8 df, p=0.03
## Score (logrank) test = 21.29 on 8 df, p=0.006
ggforest(fit_synthetique)
Allow to see which vairable would have been selected
Original
boot.stepAIC(fit_original, original1, B = 100, k=log(nrow(original1)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 5.53 * df
##
## Covariates selected
## (%)
## haplotype 95
## rejet_aigu 47
## cyp3A5D 29
## age_d 21
## sexe_d 3
## Null 2
## sexe_r 2
## TIF 2
##
## Coefficients Sign
## + (%) - (%)
## age_d 100 0
## haplotype 100 0
## rejet_aigu 100 0
## sexe_d 100 0
## sexe_r 100 0
## cyp3A5D 0 100
## TIF 0 100
##
## Stat Significance
## (%)
## age_d 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
## TIF 100
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = original1)
##
## coef exp(coef) se(coef) z p
## haplotype 1.2035 3.3319 0.3276 3.674 0.000239
##
## Likelihood ratio test=14.01 on 1 df, p=0.0001822
## n= 253, number of events= 22
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 14 -24.59675 212.4446
## 2 - sexe_r 1 0.009067423 15 -24.58769 206.9203
## 3 - age_r 1 0.039034149 16 -24.54865 201.4259
## 4 - TIF 1 0.251427799 17 -24.29723 196.1439
## 5 - sexe_d 1 0.442126797 18 -23.85510 191.0527
## 6 - age_d 1 2.811491990 19 -21.04361 188.3308
## 7 - cyp3A5D 1 2.805921958 20 -18.23768 185.6033
## 8 - rejet_aigu 1 4.230950507 21 -14.00673 184.3009
synhtetic knn20
boot.stepAIC(fit_synthetique, avatars_tibble_knn20, B = 100, k=log(nrow(avatars_tibble_knn20)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn20)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 5.53 * df
##
## Covariates selected
## (%)
## haplotype 83
## rejet_aigu 53
## TIF 43
## cyp3A5D 34
## age_d 17
## sexe_r 7
## sexe_d 6
## age_r 5
## Null 4
##
## Coefficients Sign
## + (%) - (%)
## haplotype 100.00 0.00
## rejet_aigu 100.00 0.00
## age_d 94.12 5.88
## age_r 80.00 20.00
## sexe_d 16.67 83.33
## cyp3A5D 0.00 100.00
## sexe_r 0.00 100.00
## TIF 0.00 100.00
##
## Stat Significance
## (%)
## age_d 100.00
## age_r 100.00
## TIF 100.00
## haplotype 97.59
## cyp3A5D 97.06
## rejet_aigu 90.57
## sexe_r 85.71
## sexe_d 83.33
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = avatars_tibble_knn20)
##
## coef exp(coef) se(coef) z p
## haplotype 1.4907 4.4401 0.5537 2.692 0.0071
##
## Likelihood ratio test=7.34 on 1 df, p=0.006759
## n= 253, number of events= 13
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 5 -21.869781 142.3860
## 2 - age_r 1 0.01617331 6 -21.853608 136.8688
## 3 - sexe_d 1 0.32081312 7 -21.532795 131.6562
## 4 - sexe_r 1 0.66821937 8 -20.864576 126.7911
## 5 - age_d 1 1.78823983 9 -19.076336 123.0459
## 6 - cyp3A5D 1 2.32166936 10 -16.754666 119.8342
## 7 - TIF 1 5.21665836 11 -11.538008 119.5175
## 8 - rejet_aigu 1 4.20197314 12 -7.336035 118.1860
fit_original <- coxph(Surv(delai_event, event) ~ haplotype , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2035 3.3319 0.3276 3.674 0.000239 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.332 0.3001 1.753 6.332
##
## Concordance= 0.682 (se = 0.044 )
## Likelihood ratio test= 14.01 on 1 df, p=2e-04
## Wald test = 13.5 on 1 df, p=2e-04
## Score (logrank) test = 14.91 on 1 df, p=1e-04
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype , data = avatars_tibble_knn20)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = avatars_tibble_knn20)
##
## n= 253, number of events= 13
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.4907 4.4401 0.5537 2.692 0.0071 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 4.44 0.2252 1.5 13.14
##
## Concordance= 0.673 (se = 0.032 )
## Likelihood ratio test= 7.34 on 1 df, p=0.007
## Wald test = 7.25 on 1 df, p=0.007
## Score (logrank) test = 7.03 on 1 df, p=0.008
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=avatars_tibble_knn20, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data <- data.frame(HR=boot_hrs[,1])
# Calculate summary statistics
summary_stats <- quantile(hr_data$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1))
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
geom_vline(aes(xintercept=summary_stats["25th"]), color="gray", linetype="dashed", linewidth=2) +
geom_vline(aes(xintercept=summary_stats["Median"]), color="blue", linetype="dashed", linewidth=2) +
geom_vline(aes(xintercept=summary_stats["75th"]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
print(summary_stats)
## Min 2.5th 5th 25th Median 75th 95th
## 2.294047 2.553093 2.634223 3.574178 4.495083 6.306976 19.492347
## 97.5th Max
## 28.651190 101.963509
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
km_original_plot <- ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_original_plot
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = avatars_tibble_knn20)
km_synthetique_avatar_20 <- ggsurvplot(
km_synthetique,
data = avatars_tibble_knn20,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_avatar_20
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), avatars_tibble_factor_knn20 %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
library(GGally)
pm_knn20 <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_knn20
# ggsave("comparaison_distribution_knn20.pdf")
Allow to define the variability range of HR for different Avatar generated with different seed but the same knn (inter dataset variability)
# Assuming all your existing functions and necessary libraries are loaded
run_model_with_seed <- function(seed_value) {
# Number of neighbors
k <- 20 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed(seed_value)
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble_knn20 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
avatars_tibble_factor_knn20 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
) %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
# Finally, fit the Cox model
fit <- coxph(Surv(delai_event, event) ~ haplotype ,
data = avatars_tibble_knn20)
# Calculate confidence intervals
ci <- confint(fit)
return(list(fit = fit, ci = ci))
}
extract_hrs_and_cis <- function(model_output) {
coefs <- model_output$fit$coefficients
ci <- model_output$ci
hr <- exp(coefs)
ci_lower <- exp(ci[,"2.5 %"])
ci_upper <- exp(ci[,"97.5 %"])
return(data.frame(variable = names(hr), hr = hr, ci_lower = ci_lower, ci_upper = ci_upper))
}
# Generate a list of seed values
seed_values <- sample(x=100) # Modify this if you need different seed values
# Apply the algorithm with different seed values
model_results <- map(seed_values, run_model_with_seed)
# Extract HR and CI from model results
extracted_results <- map(model_results, extract_hrs_and_cis)
# Combine results into a single data frame
combined_results <- bind_rows(extracted_results)
# Calculate median HR and CI for each variable
# aggregate_metrics <- combined_results %>%
# group_by(variable) %>%
# summarize(
# median_hr = median(hr),
# median_ci_lower = median(ci_lower),
# median_ci_upper = median(ci_upper)
# )
#
# aggregate_metrics
# Calculate the specified percentiles for HRs for each variable
percentile_metrics <- combined_results %>%
group_by(variable) %>%
summarize(
percentile_0 = quantile(hr, probs = 0),
percentile_5 = quantile(hr, probs = 0.05),
percentile_25 = quantile(hr, probs = 0.25),
percentile_50 = quantile(hr, probs = 0.5),
percentile_75 = quantile(hr, probs = 0.75),
percentile_95 = quantile(hr, probs = 0.95),
percentile_100 = quantile(hr, probs = 1)
) %>%
pivot_longer(-variable, names_to = "Percentile_HR", values_to = "Value_HR") %>%
mutate(Value_HR = round(Value_HR, 2))
# percentile_metrics
# datatable(percentile_metrics)
knitr::kable(percentile_metrics %>% mutate(Value_HR = round(Value_HR, 2)), "simple")
| variable | Percentile_HR | Value_HR |
|---|---|---|
| haplotype | percentile_0 | 2.78 |
| haplotype | percentile_5 | 3.49 |
| haplotype | percentile_25 | 4.35 |
| haplotype | percentile_50 | 5.98 |
| haplotype | percentile_75 | 8.04 |
| haplotype | percentile_95 | 12.90 |
| haplotype | percentile_100 | 25.90 |
We investigate the effect of data augmentaiotn with a defined seed and knn=20
data_augment_avatar <- function(x) {
data_normalized <- scale(original1)
pca <- prcomp(data_normalized, scale. = FALSE)# pour selecitonner le nombre de cp rank. = 3
# Number of neighbors
k <- 20 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed( str_c(1,x))
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
}
iteration <- c(1:4)
augmented_data_20 <- map_dfr(iteration, data_augment_avatar, .id = "iter_")
augmented_data_20 <- augmented_data_20 %>%
select(-iter_)
augmented_data_20_factor_knn20 <- augmented_data_20 %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
Plot of the synthetic and original in the latent space
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
augmented_data_20 %>% mutate(DataType = 'Synthetic')
)
# Perform PCA on combined data
combined_data_normalized <- scale(combined_data[, -which(names(combined_data) %in% c("DataType", "id"))])
combined_pca <- prcomp(combined_data_normalized, scale. = FALSE)
# Extract the first two principal components
combined_pca_data <- data.frame(combined_pca$x[, 1:2])
combined_pca_data$DataType <- combined_data$DataType
# Plot PCA with color differentiation
ggplot(combined_pca_data, aes(x = PC1, y = PC2, color = DataType)) +
geom_point(alpha = 0.5) +
theme_minimal() +
labs(title = "PCA Plot", x = "Principal Component 1", y = "Principal Component 2", color = "Data Type")
Export data augmented knn = 20
write_csv(augmented_data_20, file = "avatar_sfpt_knn20_data_augmented.csv")
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 1012 | ||
| haplotype (%) | <0.001 | |||
| 1 | 97 (38.3) | 345 ( 34.1) | ||
| 2 | 123 (48.6) | 614 ( 60.7) | ||
| 3 | 33 (13.0) | 53 ( 5.2) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 909 ( 89.8) | 0.006 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 55.60 [24.90, 75.77] | 0.755 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 700 ( 69.2) | 0.027 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 39.73 [16.63, 67.75] | 0.479 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 743 ( 73.4) | 0.161 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 244 ( 24.1) | 0.013 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1144.15 [576.99, 2091.07] | 0.516 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 55 ( 5.4) | 0.073 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 5.21 [1.02, 15.10] | 0.989 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 1012 (100.0) | <0.001 |
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(augmented_data_20)
## haplotype cyp3A5D age_r sexe_r
## Min. :1.000 Min. :1.000 Min. :24.90 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:46.31 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :55.60 Median :2.000
## Mean :1.711 Mean :1.898 Mean :53.80 Mean :1.692
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:61.59 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :75.77 Max. :2.000
## age_d sexe_d rejet_aigu TIF
## Min. :16.63 Min. :1.000 Min. :1.000 Min. : 577
## 1st Qu.:30.92 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1030
## Median :39.73 Median :2.000 Median :1.000 Median :1144
## Mean :39.12 Mean :1.734 Mean :1.241 Mean :1163
## 3rd Qu.:47.08 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:1270
## Max. :67.75 Max. :2.000 Max. :2.000 Max. :2091
## event delai_event
## Min. :0.00000 Min. : 1.024
## 1st Qu.:0.00000 1st Qu.: 3.758
## Median :0.00000 Median : 5.211
## Mean :0.05435 Mean : 5.675
## 3rd Qu.:0.00000 3rd Qu.: 7.106
## Max. :1.00000 Max. :15.099
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(augmented_data_20, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = augmented_data_20)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = augmented_data_20)
##
## n= 1012, number of events= 55
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.1558853 3.1768346 0.2802950 4.124 3.73e-05 ***
## cyp3A5D -1.3490551 0.2594853 0.3808095 -3.543 0.000396 ***
## age_r 0.0192921 1.0194794 0.0167408 1.152 0.249157
## sexe_r 0.2466270 1.2797016 0.3022308 0.816 0.414488
## age_d 0.0318037 1.0323149 0.0191358 1.662 0.096513 .
## sexe_d -0.4167056 0.6592149 0.3077915 -1.354 0.175782
## rejet_aigu 0.9525651 2.5923507 0.2908215 3.275 0.001055 **
## TIF -0.0002825 0.9997175 0.0006848 -0.413 0.679904
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.1768 0.3148 1.8340 5.5028
## cyp3A5D 0.2595 3.8538 0.1230 0.5473
## age_r 1.0195 0.9809 0.9866 1.0535
## sexe_r 1.2797 0.7814 0.7077 2.3140
## age_d 1.0323 0.9687 0.9943 1.0718
## sexe_d 0.6592 1.5170 0.3606 1.2051
## rejet_aigu 2.5924 0.3858 1.4660 4.5840
## TIF 0.9997 1.0003 0.9984 1.0011
##
## Concordance= 0.76 (se = 0.036 )
## Likelihood ratio test= 57.97 on 8 df, p=1e-09
## Wald test = 60.58 on 8 df, p=4e-10
## Score (logrank) test = 62.39 on 8 df, p=2e-10
ggforest(fit_synthetique)
BootstepAIC augmented synthetic knn20
boot.stepAIC(fit_synthetique, augmented_data_20, B = 100, k=log(nrow(augmented_data_20)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = augmented_data_20)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 6.92 * df
##
## Covariates selected
## (%)
## haplotype 100
## rejet_aigu 72
## cyp3A5D 66
## age_d 39
## age_r 20
## sexe_d 13
## TIF 9
## sexe_r 6
##
## Coefficients Sign
## + (%) - (%)
## age_d 100.00 0.00
## age_r 100.00 0.00
## haplotype 100.00 0.00
## rejet_aigu 100.00 0.00
## sexe_r 100.00 0.00
## TIF 11.11 88.89
## cyp3A5D 0.00 100.00
## sexe_d 0.00 100.00
##
## Stat Significance
## (%)
## age_d 100
## age_r 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
## TIF 100
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_d + rejet_aigu, data = augmented_data_20)
##
## coef exp(coef) se(coef) z p
## haplotype 1.20801 3.34682 0.26892 4.492 7.05e-06
## cyp3A5D -1.35592 0.25771 0.37927 -3.575 0.00035
## age_d 0.04692 1.04804 0.01769 2.652 0.00800
## rejet_aigu 0.87931 2.40925 0.28225 3.115 0.00184
##
## Likelihood ratio test=52.78 on 4 df, p=9.484e-11
## n= 1012, number of events= 55
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_d + rejet_aigu
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 47 -57.97016 631.2875
## 2 - TIF 1 0.1732023 48 -57.79696 624.5410
## 3 - sexe_r 1 0.6650970 49 -57.13186 618.2864
## 4 - age_r 1 0.9729519 50 -56.15891 612.3397
## 5 - sexe_d 1 3.3809730 51 -52.77794 608.8010
Final model original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + rejet_aigu , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + rejet_aigu,
## data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.1681 3.2160 0.3261 3.582 0.000341 ***
## rejet_aigu 0.9238 2.5188 0.4661 1.982 0.047482 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.216 0.3109 1.697 6.094
## rejet_aigu 2.519 0.3970 1.010 6.280
##
## Concordance= 0.732 (se = 0.05 )
## Likelihood ratio test= 18.24 on 2 df, p=1e-04
## Wald test = 17.44 on 2 df, p=2e-04
## Score (logrank) test = 19.29 on 2 df, p=6e-05
Final model synthetic
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_d + rejet_aigu, data = augmented_data_20)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_d + rejet_aigu, data = augmented_data_20)
##
## n= 1012, number of events= 55
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.20801 3.34682 0.26892 4.492 7.05e-06 ***
## cyp3A5D -1.35592 0.25771 0.37927 -3.575 0.00035 ***
## age_d 0.04692 1.04804 0.01769 2.652 0.00800 **
## rejet_aigu 0.87931 2.40925 0.28225 3.115 0.00184 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3468 0.2988 1.9757 5.669
## cyp3A5D 0.2577 3.8803 0.1225 0.542
## age_d 1.0480 0.9542 1.0123 1.085
## rejet_aigu 2.4092 0.4151 1.3856 4.189
##
## Concordance= 0.747 (se = 0.035 )
## Likelihood ratio test= 52.78 on 4 df, p=9e-11
## Wald test = 51.35 on 4 df, p=2e-10
## Score (logrank) test = 54.37 on 4 df, p=4e-11
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = augmented_data_20_factor_knn20)
km_synthetique_avatar_20_augmented <- ggsurvplot(
km_synthetique,
data = augmented_data_20_factor_knn20,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_avatar_20_augmented
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), augmented_data_20_factor_knn20 %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
Graphical exploraiotn of distribution
library(GGally)
pm_knn20_augmented <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_knn20_augmented
# ggsave("comparaison_distribution_augmented_knn20.pdf")
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype + age_d + cyp3A5D + rejet_aigu , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=augmented_data_20, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data_haplo <- data.frame(HR=boot_hrs[,1])
hr_data_age_d <- data.frame(HR=boot_hrs[,2])
hr_data_cyp3A5D <- data.frame(HR=boot_hrs[,3])
hr_data_rejet_aigu <- data.frame(HR=boot_hrs[,4])
# Calculate summary statistics
summary_stats <- quantile(hr_data_haplo$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) %>%
bind_rows(quantile(hr_data_age_d$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) ) %>%
bind_rows(quantile(hr_data_cyp3A5D$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) ) %>%
bind_rows(quantile(hr_data_rejet_aigu$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) )
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data_haplo, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# # geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
# geom_vline(aes(xintercept=summary_stats["25th"][[1]][[1]]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Median"][[1]][[1]]), color="blue", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["75th"])[[1]][[1]], color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
knitr::kable(summary_stats, "simple")
| Min | 2.5th | 5th | 25th | Median | 75th | 95th | 97.5th | Max |
|---|---|---|---|---|---|---|---|---|
| 2.2104216 | 2.4173764 | 2.5090651 | 2.9659151 | 3.372666 | 3.8308308 | 4.8235844 | 5.447514 | 6.068198 |
| 1.0024396 | 1.0218809 | 1.0260181 | 1.0405190 | 1.051443 | 1.0613370 | 1.0785523 | 1.082279 | 1.096752 |
| 0.1185243 | 0.1262952 | 0.1374447 | 0.1961807 | 0.263547 | 0.3461597 | 0.6474621 | 0.761141 | 1.247091 |
| 1.2164271 | 1.4634958 | 1.5824677 | 1.9565206 | 2.242277 | 2.6623821 | 4.4381318 | 4.712314 | 7.128857 |
Allow to define the inter variability range of HR for augmented knn=20 (inter dataset variability) by using 100 bootraps
# Assuming all your existing functions and necessary libraries are loaded
run_model_with_seed <- function(seed_value) {
# augmentaiotn of data
data_augment_avatar <- function(x) {
data_normalized <- scale(original1)
pca <- prcomp(data_normalized, scale. = FALSE)# pour selecitonner le nombre de cp rank. = 3
# Number of neighbors
k <- 20 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
}
iteration <- c(1:4)
set.seed(seed_value)
augmented_data_x <- map_dfr(iteration, data_augment_avatar, .id = "iter_")
augmented_data_x <- augmented_data_x %>%
select(-iter_)
###############
# Finally, fit the Cox model
fit <- coxph(Surv(delai_event, event) ~ haplotype + age_d + cyp3A5D + rejet_aigu,
data = augmented_data_x)
coefs <- fit$coefficients
hr <- exp(coefs)
return(data.frame(variable = names(hr), hr = hr))
# Calculate confidence intervals
# ci <- confint(fit)
# return(list(fit = fit, ci = ci))
}
# Generate a list of seed values
seed_value <- sample(x=100) # Modify this if you need different seed values
# Apply the algorithm with different seed values
model_results <- map(seed_value, run_model_with_seed)
# Extract HR and CI from model results
#extracted_results <- map(model_results, extract_hrs_and_cis)
# Combine results into a single data frame
combined_results <- bind_rows(model_results)
# Calculate median HR and CI for each variable
# aggregate_metrics <- combined_results %>%
# group_by(variable) %>%
# summarize(
# median_hr = median(hr),
# median_ci_lower = median(ci_lower),
# median_ci_upper = median(ci_upper)
# )
#
# aggregate_metrics
# Calculate the specified percentiles for HRs for each variable
percentile_metrics <- combined_results %>%
group_by(variable) %>%
summarize(
percentile_0 = quantile(hr, probs = 0),
percentile_5 = quantile(hr, probs = 0.05),
percentile_25 = quantile(hr, probs = 0.25),
percentile_50 = quantile(hr, probs = 0.5),
percentile_75 = quantile(hr, probs = 0.75),
percentile_95 = quantile(hr, probs = 0.95),
percentile_100 = quantile(hr, probs = 1)
) %>%
pivot_longer(-variable, names_to = "Percentile_HR", values_to = "Value_HR") %>%
mutate(Value_HR = round(Value_HR, 2))
# percentile_metrics
# datatable(percentile_metrics)
knitr::kable(percentile_metrics %>% mutate(Value_HR = round(Value_HR, 2)), "simple")
| variable | Percentile_HR | Value_HR |
|---|---|---|
| age_d | percentile_0 | 1.00 |
| age_d | percentile_5 | 1.02 |
| age_d | percentile_25 | 1.04 |
| age_d | percentile_50 | 1.05 |
| age_d | percentile_75 | 1.06 |
| age_d | percentile_95 | 1.08 |
| age_d | percentile_100 | 1.09 |
| cyp3A5D | percentile_0 | 0.14 |
| cyp3A5D | percentile_5 | 0.19 |
| cyp3A5D | percentile_25 | 0.28 |
| cyp3A5D | percentile_50 | 0.35 |
| cyp3A5D | percentile_75 | 0.48 |
| cyp3A5D | percentile_95 | 0.97 |
| cyp3A5D | percentile_100 | 2.63 |
| haplotype | percentile_0 | 3.32 |
| haplotype | percentile_5 | 3.58 |
| haplotype | percentile_25 | 4.43 |
| haplotype | percentile_50 | 5.30 |
| haplotype | percentile_75 | 5.91 |
| haplotype | percentile_95 | 7.25 |
| haplotype | percentile_100 | 9.70 |
| rejet_aigu | percentile_0 | 1.73 |
| rejet_aigu | percentile_5 | 2.11 |
| rejet_aigu | percentile_25 | 2.80 |
| rejet_aigu | percentile_50 | 3.59 |
| rejet_aigu | percentile_75 | 4.52 |
| rejet_aigu | percentile_95 | 5.83 |
| rejet_aigu | percentile_100 | 8.85 |
Allow to define the variability range of HR for different Avatar generated with different seed and different values of knn
# library(tidyverse)
#
# # Assuming all your existing functions and necessary libraries are loaded
# run_for_k_values <- function(k) {
# run_model_with_seed <- function(seed_value) {
#
#
# # augmentaiotn of data
#
# data_augment_avatar <- function(x) {
# data_normalized <- scale(original1)
# pca <- prcomp(data_normalized, scale. = FALSE)# pour selecitonner le nombre de cp rank. = 3
# # Number of neighbors
# #k <- 20 # Adjust this based on your requirement
# pca_transformed_data <- pca$x
# knn_result <- get.knn(pca_transformed_data, k)
#
# generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
# n <- nrow(pca_transformed_data)
# avatar_weights <- matrix(nrow = n, ncol = k)
#
# for (i in 1:n) {
# # Step 1: Inverse of Distances
# distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
# inverse_distances <- 1 / distances
#
# # Step 2: Random Weights
#
# random_weights <- rexp(k, rate = 1)
#
# # Step 3: Contribution Factors
#
# shuffled_indices <- sample(k)
# contribution_factors <- 1 / (2^shuffled_indices)
#
# # Step 4: Calculate Weights
# weights <- inverse_distances * random_weights * contribution_factors
#
# # Step 5: Normalize Weights
# normalized_weights <- weights / sum(weights)
#
# avatar_weights[i, ] <- normalized_weights
# }
#
# return(avatar_weights)
# }
#
#
#
# # Generate avatar weights
#
# avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
#
# # Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
#
# # Function to generate avatars in PCA space based on weights
# generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
# n <- nrow(pca_transformed_data)
# avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
#
# for (i in 1:n) {
# weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
# avatars_pca[i, ] <- colSums(weighted_avatars)
# }
#
# return(avatars_pca)
# }
# # Generate avatars in PCA space
# avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# # Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# # Inverse PCA transformation
# inverse_pca <- function(pca_object, pca_data) {
# return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
# }
# avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
#
# # Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# # Inverse normalization (if the original data was normalized)
# avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
# avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
#
# avatars_tibble <- as_tibble(avatars_rescaled) %>%
# mutate(haplotype = round(haplotype, digits=0),
# cyp3A5D = round(cyp3A5D, digits=0),
# sexe_r = round(sexe_r , digits=0),
# sexe_d = round(sexe_d , digits=0),
# rejet_aigu = round(rejet_aigu , digits=0),
# event = round(event, digits=0)
# # CYP3A4_1B = round(CYP3A4_1B, digits=0),
# # MDR1_C1236T = round(MDR1_C1236T, digits=0),
# # MDR1_G2677T = round(MDR1_G2677T, digits=0),
# # MDR1_C3435T = round(MDR1_C3435T, digits=0)
# )
# }
#
# iteration <- c(1:4)
# # set.seed(seed_value)
# set.seed(seed_value)
# augmented_data_x <- map_dfr(iteration, data_augment_avatar, .id = "iter_")
#
# augmented_data_x <- augmented_data_x %>%
# select(-iter_)
#
#
# ###############
# # Finally, fit the Cox model
# fit <- coxph(Surv(delai_event, event) ~ haplotype ,
# data = augmented_data_x)
# coefs <- fit$coefficients
# hr <- exp(coefs)
# return(data.frame(variable = names(hr), hr = hr))
# # Calculate confidence intervals
# # ci <- confint(fit)
#
# # return(list(fit = fit, ci = ci))
# }
#
#
#
#
# # Generate a list of seed values
# seed_value <- sample(x=100) # Modify this if you need different seed values
#
# # Apply the algorithm with different seed values
# model_results <- map(seed_value, run_model_with_seed)
#
# # Extract HR and CI from model results
# #extracted_results <- map(model_results, extract_hrs_and_cis)
#
# # Combine results into a single data frame
# combined_results <- bind_rows(model_results)
#
# # Calculate median HR and CI for each variable
# # aggregate_metrics <- combined_results %>%
# # group_by(variable) %>%
# # summarize(
# # median_hr = median(hr),
# # median_ci_lower = median(ci_lower),
# # median_ci_upper = median(ci_upper)
# # )
# #
# # aggregate_metrics
#
# # Calculate the specified percentiles for HRs for each variable
# percentile_metrics <- combined_results %>%
# group_by(variable) %>%
# summarize(
# percentile_0 = quantile(hr, probs = 0),
# percentile_5 = quantile(hr, probs = 0.05),
# percentile_25 = quantile(hr, probs = 0.25),
# percentile_50 = quantile(hr, probs = 0.5),
# percentile_75 = quantile(hr, probs = 0.75),
# percentile_95 = quantile(hr, probs = 0.95),
# percentile_100 = quantile(hr, probs = 1)
# ) %>%
# pivot_longer(-variable, names_to = "Percentile_HR", values_to = "Value_HR") %>%
# mutate(Value_HR = round(Value_HR, 2))
# # percentile_metrics
#
# return(percentile_metrics)
# }
# # Define different k values
# k_values <- c(3, 5, 10, 15, 20, 50)
#
# results_list <- map(k_values, run_for_k_values)
# names(results_list) <- paste("K =", k_values)
#
#
# # datatable(percentile_metrics)
# knitr::kable(results_list, "simple")
# Number of neighbors
k <- 10 # Adjust this based on your requirement
algorithm
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed(12)
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
Generation of avatar in the latent space
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
Return to the initial scale
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
Transform into tibble
avatars_tibble_knn10 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
avatars_tibble_factor_knn10 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
) %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
Plot of the synthetic and original in the latent space
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
avatars_tibble_knn10 %>% mutate(DataType = 'Synthetic')
)
# Perform PCA on combined data
combined_data_normalized <- scale(combined_data[, -which(names(combined_data) %in% c("DataType", "id"))])
combined_pca <- prcomp(combined_data_normalized, scale. = FALSE)
# Extract the first two principal components
combined_pca_data <- data.frame(combined_pca$x[, 1:2])
combined_pca_data$DataType <- combined_data$DataType
# Plot PCA with color differentiation
ggplot(combined_pca_data, aes(x = PC1, y = PC2, color = DataType)) +
geom_point(alpha = 0.8) +
theme_minimal() +
labs(title = "PCA Plot", x = "Principal Component 1", y = "Principal Component 2", color = "Data Type")
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 253 | ||
| haplotype (%) | 0.008 | |||
| 1 | 97 (38.3) | 93 ( 36.8) | ||
| 2 | 123 (48.6) | 146 ( 57.7) | ||
| 3 | 33 (13.0) | 14 ( 5.5) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 219 ( 86.6) | 0.384 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 54.21 [24.23, 73.48] | 0.750 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 165 ( 65.2) | 0.460 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 39.60 [15.46, 62.80] | 0.866 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 188 ( 74.3) | 0.200 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 72 ( 28.5) | 0.439 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1156.78 [570.38, 1987.46] | 0.785 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 20 ( 7.9) | 0.872 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 5.60 [0.96, 15.33] | 0.382 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 253 (100.0) | <0.001 |
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(avatars_tibble_knn10)
## haplotype cyp3A5D age_r sexe_r
## Min. :1.000 Min. :1.000 Min. :24.23 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:46.61 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :54.21 Median :2.000
## Mean :1.688 Mean :1.866 Mean :53.72 Mean :1.652
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:62.31 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :73.48 Max. :2.000
## age_d sexe_d rejet_aigu TIF
## Min. :15.46 Min. :1.000 Min. :1.000 Min. : 570.4
## 1st Qu.:28.89 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1035.3
## Median :39.60 Median :2.000 Median :1.000 Median :1156.8
## Mean :38.51 Mean :1.743 Mean :1.285 Mean :1167.6
## 3rd Qu.:47.61 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1296.9
## Max. :62.80 Max. :2.000 Max. :2.000 Max. :1987.5
## event delai_event
## Min. :0.00000 Min. : 0.9566
## 1st Qu.:0.00000 1st Qu.: 4.0307
## Median :0.00000 Median : 5.5990
## Mean :0.07905 Mean : 5.9403
## 3rd Qu.:0.00000 3rd Qu.: 7.1670
## Max. :1.00000 Max. :15.3301
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(avatars_tibble_knn10, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn10)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn10)
##
## n= 253, number of events= 20
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 2.189521 8.930938 0.638419 3.430 0.000604 ***
## cyp3A5D -0.517214 0.596179 0.867578 -0.596 0.551069
## age_r 0.011612 1.011680 0.026141 0.444 0.656889
## sexe_r 0.821917 2.274856 0.637399 1.289 0.197229
## age_d 0.041406 1.042275 0.029631 1.397 0.162302
## sexe_d 2.595350 13.401278 1.198788 2.165 0.030389 *
## rejet_aigu 1.420749 4.140222 0.816441 1.740 0.081828 .
## TIF -0.002234 0.997768 0.001226 -1.822 0.068386 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 8.9309 0.11197 2.5555 31.212
## cyp3A5D 0.5962 1.67735 0.1089 3.265
## age_r 1.0117 0.98845 0.9612 1.065
## sexe_r 2.2749 0.43959 0.6522 7.934
## age_d 1.0423 0.95944 0.9835 1.105
## sexe_d 13.4013 0.07462 1.2786 140.461
## rejet_aigu 4.1402 0.24153 0.8357 20.511
## TIF 0.9978 1.00224 0.9954 1.000
##
## Concordance= 0.851 (se = 0.04 )
## Likelihood ratio test= 41.23 on 8 df, p=2e-06
## Wald test = 22.55 on 8 df, p=0.004
## Score (logrank) test = 42.55 on 8 df, p=1e-06
ggforest(fit_synthetique)
Allow to see which vairable would have been selected
Original
boot.stepAIC(fit_original, original1, B = 100, k=log(nrow(original1)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 5.53 * df
##
## Covariates selected
## (%)
## haplotype 95
## rejet_aigu 47
## cyp3A5D 29
## age_d 21
## sexe_d 3
## Null 2
## sexe_r 2
## TIF 2
##
## Coefficients Sign
## + (%) - (%)
## age_d 100 0
## haplotype 100 0
## rejet_aigu 100 0
## sexe_d 100 0
## sexe_r 100 0
## cyp3A5D 0 100
## TIF 0 100
##
## Stat Significance
## (%)
## age_d 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
## TIF 100
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = original1)
##
## coef exp(coef) se(coef) z p
## haplotype 1.2035 3.3319 0.3276 3.674 0.000239
##
## Likelihood ratio test=14.01 on 1 df, p=0.0001822
## n= 253, number of events= 22
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 14 -24.59675 212.4446
## 2 - sexe_r 1 0.009067423 15 -24.58769 206.9203
## 3 - age_r 1 0.039034149 16 -24.54865 201.4259
## 4 - TIF 1 0.251427799 17 -24.29723 196.1439
## 5 - sexe_d 1 0.442126797 18 -23.85510 191.0527
## 6 - age_d 1 2.811491990 19 -21.04361 188.3308
## 7 - cyp3A5D 1 2.805921958 20 -18.23768 185.6033
## 8 - rejet_aigu 1 4.230950507 21 -14.00673 184.3009
synhtetic knn10
boot.stepAIC(fit_synthetique, avatars_tibble_knn10, B = 100, k=log(nrow(avatars_tibble_knn10)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = avatars_tibble_knn10)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 5.53 * df
##
## Covariates selected
## (%)
## haplotype 98
## sexe_d 66
## rejet_aigu 53
## TIF 39
## age_d 24
## sexe_r 18
## age_r 8
## cyp3A5D 5
##
## Coefficients Sign
## + (%) - (%)
## age_d 100 0
## age_r 100 0
## haplotype 100 0
## rejet_aigu 100 0
## sexe_d 100 0
## sexe_r 100 0
## cyp3A5D 20 80
## TIF 0 100
##
## Stat Significance
## (%)
## age_d 100.00
## age_r 100.00
## haplotype 100.00
## sexe_r 100.00
## TIF 100.00
## cyp3A5D 80.00
## rejet_aigu 62.26
## sexe_d 62.12
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + rejet_aigu,
## data = avatars_tibble_knn10)
##
## coef exp(coef) se(coef) z p
## haplotype 1.4166 4.1230 0.3771 3.756 0.000173
## rejet_aigu 1.9030 6.7057 0.7580 2.510 0.012058
##
## Likelihood ratio test=28.87 on 2 df, p=5.394e-07
## n= 253, number of events= 20
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype + rejet_aigu
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 12 -41.23295 171.7220
## 2 - age_r 1 0.2005000 13 -41.03245 166.3891
## 3 - cyp3A5D 1 0.4592484 14 -40.57320 161.3150
## 4 - sexe_r 1 1.4438502 15 -39.12935 157.2254
## 5 - age_d 1 2.4135185 16 -36.71583 154.1055
## 6 - TIF 1 3.9163173 17 -32.79951 152.4885
## 7 - sexe_d 1 3.9340176 18 -28.86549 150.8891
fit_original <- coxph(Surv(delai_event, event) ~ haplotype , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2035 3.3319 0.3276 3.674 0.000239 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.332 0.3001 1.753 6.332
##
## Concordance= 0.682 (se = 0.044 )
## Likelihood ratio test= 14.01 on 1 df, p=2e-04
## Wald test = 13.5 on 1 df, p=2e-04
## Score (logrank) test = 14.91 on 1 df, p=1e-04
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype , data = avatars_tibble_knn10)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype, data = avatars_tibble_knn10)
##
## n= 253, number of events= 20
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.7378 5.6846 0.3963 4.385 1.16e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 5.685 0.1759 2.614 12.36
##
## Concordance= 0.721 (se = 0.035 )
## Likelihood ratio test= 19.27 on 1 df, p=1e-05
## Wald test = 19.23 on 1 df, p=1e-05
## Score (logrank) test = 19.86 on 1 df, p=8e-06
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=avatars_tibble_knn10, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data <- data.frame(HR=boot_hrs[,1])
# Calculate summary statistics
summary_stats <- quantile(hr_data$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1))
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
geom_vline(aes(xintercept=summary_stats["25th"]), color="gray", linetype="dashed", linewidth=2) +
geom_vline(aes(xintercept=summary_stats["Median"]), color="blue", linetype="dashed", linewidth=2) +
geom_vline(aes(xintercept=summary_stats["75th"]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
print(summary_stats)
## Min 2.5th 5th 25th Median 75th 95th 97.5th
## 2.612300 3.397346 3.834001 4.983690 5.923145 7.502672 14.680292 16.598891
## Max
## 23.260106
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
km_original_plot <- ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_original_plot
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = avatars_tibble_knn10)
km_synthetique_avatar_10 <- ggsurvplot(
km_synthetique,
data = avatars_tibble_knn10,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_avatar_10
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), avatars_tibble_factor_knn10 %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
library(GGally)
pm_knn10 <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_knn10
# ggsave("comparaison_distribution_knn10.pdf")
Allow to define the variability range of HR for different Avatar generated with different seed but the same knn (inter dataset variability)
# Assuming all your existing functions and necessary libraries are loaded
run_model_with_seed <- function(seed_value) {
# Number of neighbors
k <- 10 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed(seed_value)
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble_knn10 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
avatars_tibble_factor_knn10 <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
) %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
# Finally, fit the Cox model
fit <- coxph(Surv(delai_event, event) ~ haplotype ,
data = avatars_tibble_knn10)
# Calculate confidence intervals
ci <- confint(fit)
return(list(fit = fit, ci = ci))
}
extract_hrs_and_cis <- function(model_output) {
coefs <- model_output$fit$coefficients
ci <- model_output$ci
hr <- exp(coefs)
ci_lower <- exp(ci[,"2.5 %"])
ci_upper <- exp(ci[,"97.5 %"])
return(data.frame(variable = names(hr), hr = hr, ci_lower = ci_lower, ci_upper = ci_upper))
}
# Generate a list of seed values
seed_values <- sample(x=100) # Modify this if you need different seed values
# Apply the algorithm with different seed values
model_results <- map(seed_values, run_model_with_seed)
# Extract HR and CI from model results
extracted_results <- map(model_results, extract_hrs_and_cis)
# Combine results into a single data frame
combined_results <- bind_rows(extracted_results)
# Calculate median HR and CI for each variable
# aggregate_metrics <- combined_results %>%
# group_by(variable) %>%
# summarize(
# median_hr = median(hr),
# median_ci_lower = median(ci_lower),
# median_ci_upper = median(ci_upper)
# )
#
# aggregate_metrics
# Calculate the specified percentiles for HRs for each variable
percentile_metrics <- combined_results %>%
group_by(variable) %>%
summarize(
percentile_0 = quantile(hr, probs = 0),
percentile_5 = quantile(hr, probs = 0.05),
percentile_25 = quantile(hr, probs = 0.25),
percentile_50 = quantile(hr, probs = 0.5),
percentile_75 = quantile(hr, probs = 0.75),
percentile_95 = quantile(hr, probs = 0.95),
percentile_100 = quantile(hr, probs = 1)
) %>%
pivot_longer(-variable, names_to = "Percentile_HR", values_to = "Value_HR") %>%
mutate(Value_HR = round(Value_HR, 2))
# percentile_metrics
# datatable(percentile_metrics)
knitr::kable(percentile_metrics %>% mutate(Value_HR = round(Value_HR, 2)), "simple")
| variable | Percentile_HR | Value_HR |
|---|---|---|
| haplotype | percentile_0 | 2.40 |
| haplotype | percentile_5 | 3.98 |
| haplotype | percentile_25 | 5.41 |
| haplotype | percentile_50 | 6.43 |
| haplotype | percentile_75 | 8.20 |
| haplotype | percentile_95 | 12.84 |
| haplotype | percentile_100 | 21.61 |
We investigate the effect of data augmentaiotn with a defined seed and knn=10
data_augment_avatar <- function(x) {
data_normalized <- scale(original1)
pca <- prcomp(data_normalized, scale. = FALSE)# pour selecitonner le nombre de cp rank. = 3
# Number of neighbors
k <- 10 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
set.seed( str_c(1,x))
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
}
iteration <- c(1:4)
augmented_data_10 <- map_dfr(iteration, data_augment_avatar, .id = "iter_")
augmented_data_10 <- augmented_data_10 %>%
select(-iter_)
augmented_data_10_factor_knn10 <- augmented_data_10 %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
Plot of the synthetic and original in the latent space
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
augmented_data_10 %>% mutate(DataType = 'Synthetic')
)
# Perform PCA on combined data
combined_data_normalized <- scale(combined_data[, -which(names(combined_data) %in% c("DataType", "id"))])
combined_pca <- prcomp(combined_data_normalized, scale. = FALSE)
# Extract the first two principal components
combined_pca_data <- data.frame(combined_pca$x[, 1:2])
combined_pca_data$DataType <- combined_data$DataType
# Plot PCA with color differentiation
ggplot(combined_pca_data, aes(x = PC1, y = PC2, color = DataType)) +
geom_point(alpha = 0.5) +
theme_minimal() +
labs(title = "PCA Plot", x = "Principal Component 1", y = "Principal Component 2", color = "Data Type")
Export data augmented knn = 10
write_csv(augmented_data_10, file = "avatar_sfpt_knn10_data_augmented.csv")
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 1012 | ||
| haplotype (%) | <0.001 | |||
| 1 | 97 (38.3) | 356 ( 35.2) | ||
| 2 | 123 (48.6) | 602 ( 59.5) | ||
| 3 | 33 (13.0) | 54 ( 5.3) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 880 ( 87.0) | 0.172 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 54.41 [23.53, 75.26] | 0.565 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 666 ( 65.8) | 0.244 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 39.92 [15.46, 63.56] | 0.683 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 743 ( 73.4) | 0.161 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 276 ( 27.3) | 0.155 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1141.45 [372.32, 2040.25] | 0.473 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 80 ( 7.9) | 0.776 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 5.31 [0.87, 15.33] | 0.756 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 1012 (100.0) | <0.001 |
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(augmented_data_10)
## haplotype cyp3A5D age_r sexe_r age_d
## Min. :1.000 Min. :1.00 Min. :23.53 Min. :1.000 Min. :15.46
## 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:45.97 1st Qu.:1.000 1st Qu.:29.24
## Median :2.000 Median :2.00 Median :54.41 Median :2.000 Median :39.92
## Mean :1.702 Mean :1.87 Mean :53.53 Mean :1.658 Mean :38.81
## 3rd Qu.:2.000 3rd Qu.:2.00 3rd Qu.:62.10 3rd Qu.:2.000 3rd Qu.:47.25
## Max. :3.000 Max. :2.00 Max. :75.26 Max. :2.000 Max. :63.56
## sexe_d rejet_aigu TIF event
## Min. :1.000 Min. :1.000 Min. : 372.3 Min. :0.00000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1029.4 1st Qu.:0.00000
## Median :2.000 Median :1.000 Median :1141.4 Median :0.00000
## Mean :1.734 Mean :1.273 Mean :1163.0 Mean :0.07905
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1279.4 3rd Qu.:0.00000
## Max. :2.000 Max. :2.000 Max. :2040.3 Max. :1.00000
## delai_event
## Min. : 0.8704
## 1st Qu.: 3.8048
## Median : 5.3123
## Mean : 5.7545
## 3rd Qu.: 7.1851
## Max. :15.3301
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(augmented_data_10, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = augmented_data_10)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = augmented_data_10)
##
## n= 1012, number of events= 80
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.5000227 4.4817906 0.2358547 6.360 2.02e-10 ***
## cyp3A5D -0.4037335 0.6678220 0.4804159 -0.840 0.400693
## age_r 0.0101762 1.0102281 0.0124915 0.815 0.415273
## sexe_r 0.2129631 1.2373389 0.2734454 0.779 0.436089
## age_d 0.0593288 1.0611240 0.0148048 4.007 6.14e-05 ***
## sexe_d 1.2160189 3.3737299 0.3497060 3.477 0.000507 ***
## rejet_aigu 0.8153153 2.2598881 0.2965094 2.750 0.005965 **
## TIF -0.0002206 0.9997794 0.0005277 -0.418 0.675868
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 4.4818 0.2231 2.8229 7.116
## cyp3A5D 0.6678 1.4974 0.2605 1.712
## age_r 1.0102 0.9899 0.9858 1.035
## sexe_r 1.2373 0.8082 0.7240 2.115
## age_d 1.0611 0.9424 1.0308 1.092
## sexe_d 3.3737 0.2964 1.7000 6.695
## rejet_aigu 2.2599 0.4425 1.2639 4.041
## TIF 0.9998 1.0002 0.9987 1.001
##
## Concordance= 0.784 (se = 0.027 )
## Likelihood ratio test= 112.6 on 8 df, p=<2e-16
## Wald test = 98.87 on 8 df, p=<2e-16
## Score (logrank) test = 124.7 on 8 df, p=<2e-16
ggforest(fit_synthetique)
BootstepAIC augmented synthetic knn10
boot.stepAIC(fit_synthetique, augmented_data_10, B = 100, k=log(nrow(augmented_data_10)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = augmented_data_10)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 6.92 * df
##
## Covariates selected
## (%)
## haplotype 100
## age_d 95
## sexe_d 91
## rejet_aigu 53
## age_r 2
## cyp3A5D 1
## sexe_r 1
## TIF 1
##
## Coefficients Sign
## + (%) - (%)
## age_d 100 0
## age_r 100 0
## haplotype 100 0
## rejet_aigu 100 0
## sexe_d 100 0
## sexe_r 100 0
## cyp3A5D 0 100
## TIF 0 100
##
## Stat Significance
## (%)
## age_d 100
## age_r 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
## TIF 100
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + age_d +
## sexe_d + rejet_aigu, data = augmented_data_10)
##
## coef exp(coef) se(coef) z p
## haplotype 1.48573 4.41817 0.21748 6.832 8.40e-12
## age_d 0.06233 1.06431 0.01411 4.418 9.97e-06
## sexe_d 1.12534 3.08126 0.33887 3.321 0.000897
## rejet_aigu 0.75888 2.13589 0.28913 2.625 0.008673
##
## Likelihood ratio test=111.1 on 4 df, p=< 2.2e-16
## n= 1012, number of events= 80
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype + age_d + sexe_d + rejet_aigu
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 72 -112.6018 850.2685
## 2 - TIF 1 0.1770448 73 -112.4248 843.5258
## 3 - cyp3A5D 1 0.5888032 74 -111.8360 837.1949
## 4 - age_r 1 0.5216025 75 -111.3144 830.7969
## 5 - sexe_r 1 0.2104665 76 -111.1039 824.0876
Final model original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + rejet_aigu , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + rejet_aigu,
## data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.1681 3.2160 0.3261 3.582 0.000341 ***
## rejet_aigu 0.9238 2.5188 0.4661 1.982 0.047482 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.216 0.3109 1.697 6.094
## rejet_aigu 2.519 0.3970 1.010 6.280
##
## Concordance= 0.732 (se = 0.05 )
## Likelihood ratio test= 18.24 on 2 df, p=1e-04
## Wald test = 17.44 on 2 df, p=2e-04
## Score (logrank) test = 19.29 on 2 df, p=6e-05
Final model synthetic
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_d + rejet_aigu, data = augmented_data_10)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_d + rejet_aigu, data = augmented_data_10)
##
## n= 1012, number of events= 80
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.40489 4.07506 0.21436 6.554 5.61e-11 ***
## cyp3A5D -0.23650 0.78939 0.47551 -0.497 0.618938
## age_d 0.04935 1.05058 0.01412 3.494 0.000475 ***
## rejet_aigu 1.04938 2.85587 0.27963 3.753 0.000175 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 4.0751 0.2454 2.6771 6.203
## cyp3A5D 0.7894 1.2668 0.3108 2.005
## age_d 1.0506 0.9519 1.0219 1.080
## rejet_aigu 2.8559 0.3502 1.6509 4.940
##
## Concordance= 0.777 (se = 0.025 )
## Likelihood ratio test= 97.69 on 4 df, p=<2e-16
## Wald test = 87.65 on 4 df, p=<2e-16
## Score (logrank) test = 98.98 on 4 df, p=<2e-16
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = augmented_data_10_factor_knn10)
km_synthetique_avatar_10_augmented <- ggsurvplot(
km_synthetique,
data = augmented_data_10_factor_knn10,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_avatar_10_augmented
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), augmented_data_10_factor_knn10 %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
Graphical exploraiotn of distribution
library(GGally)
pm_knn10_augmented <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_knn10_augmented
# ggsave("comparaison_distribution_augmented_knn10.pdf")
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype + age_d + sexe_d + rejet_aigu , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=augmented_data_10, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data_haplo <- data.frame(HR=boot_hrs[,1])
hr_data_age_d <- data.frame(HR=boot_hrs[,2])
hr_data_cyp3A5D <- data.frame(HR=boot_hrs[,3])
hr_data_rejet_aigu <- data.frame(HR=boot_hrs[,4])
# Calculate summary statistics
summary_stats <- quantile(hr_data_haplo$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) %>%
bind_rows(quantile(hr_data_age_d$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) ) %>%
bind_rows(quantile(hr_data_cyp3A5D$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) ) %>%
bind_rows(quantile(hr_data_rejet_aigu$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) )
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data_haplo, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# # geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
# geom_vline(aes(xintercept=summary_stats["25th"][[1]][[1]]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Median"][[1]][[1]]), color="blue", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["75th"])[[1]][[1]], color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
knitr::kable(summary_stats, "simple")
| Min | 2.5th | 5th | 25th | Median | 75th | 95th | 97.5th | Max |
|---|---|---|---|---|---|---|---|---|
| 3.022779 | 3.315683 | 3.576255 | 4.051954 | 4.604895 | 5.091066 | 6.488926 | 7.449832 | 8.648328 |
| 1.029248 | 1.039455 | 1.043175 | 1.053725 | 1.064096 | 1.073754 | 1.085664 | 1.090181 | 1.099419 |
| 1.550175 | 1.790092 | 1.846553 | 2.601247 | 3.116408 | 4.110787 | 5.926792 | 6.620560 | 10.414952 |
| 1.190996 | 1.293889 | 1.349926 | 1.724742 | 2.148875 | 2.651489 | 3.993213 | 4.209455 | 5.695346 |
Allow to define the inter variability range of HR for augmented knn=10 (inter dataset variability) by using 100 bootraps
# Assuming all your existing functions and necessary libraries are loaded
run_model_with_seed <- function(seed_value) {
# augmentaiotn of data
data_augment_avatar <- function(x) {
data_normalized <- scale(original1)
pca <- prcomp(data_normalized, scale. = FALSE)# pour selecitonner le nombre de cp rank. = 3
# Number of neighbors
k <- 10 # Adjust this based on your requirement
pca_transformed_data <- pca$x
knn_result <- get.knn(pca_transformed_data, k)
generate_avatar_weights <- function(knn_result, pca_transformed_data, k) {
n <- nrow(pca_transformed_data)
avatar_weights <- matrix(nrow = n, ncol = k)
for (i in 1:n) {
# Step 1: Inverse of Distances
distances <- sqrt(rowSums((pca_transformed_data[knn_result$nn.index[i, ], ] - pca_transformed_data[i, ])^2))
inverse_distances <- 1 / distances
# Step 2: Random Weights
random_weights <- rexp(k, rate = 1)
# Step 3: Contribution Factors
shuffled_indices <- sample(k)
contribution_factors <- 1 / (2^shuffled_indices)
# Step 4: Calculate Weights
weights <- inverse_distances * random_weights * contribution_factors
# Step 5: Normalize Weights
normalized_weights <- weights / sum(weights)
avatar_weights[i, ] <- normalized_weights
}
return(avatar_weights)
}
# Generate avatar weights
avatar_weights <- generate_avatar_weights(knn_result, pca_transformed_data, k)
# Assuming pca_result, avatar_weights, knn_result$nn.index, and pca_transformed_data are already defined
# Function to generate avatars in PCA space based on weights
generate_avatars_pca_space <- function(pca_transformed_data, knn_indices, weights) {
n <- nrow(pca_transformed_data)
avatars_pca <- matrix(nrow = n, ncol = ncol(pca_transformed_data))
for (i in 1:n) {
weighted_avatars <- pca_transformed_data[knn_indices[i, ], ] * weights[i, ]
avatars_pca[i, ] <- colSums(weighted_avatars)
}
return(avatars_pca)
}
# Generate avatars in PCA space
avatars_pca_space <- generate_avatars_pca_space(pca_transformed_data, knn_result$nn.index, avatar_weights)
# Assuming 'aids_pca' is the PCA object and 'avatars_pca_space' contains the avatars in PCA space
# Inverse PCA transformation
inverse_pca <- function(pca_object, pca_data) {
return(pca_data %*% t(pca_object$rotation) + matrix(pca_object$center, nrow = nrow(pca_data), ncol = ncol(pca_object$rotation), byrow = TRUE))
}
avatars_original_scale <- inverse_pca(pca, avatars_pca_space)
# Assuming 'aids_data_normalized' contains the scaling attributes of the original data
# Inverse normalization (if the original data was normalized)
avatars_rescaled <- scale(avatars_original_scale, center = FALSE, scale = 1/attr(data_normalized, "scaled:scale"))
avatars_rescaled <- sweep(avatars_rescaled, 2, attr(data_normalized, "scaled:center"), "+")
avatars_tibble <- as_tibble(avatars_rescaled) %>%
mutate(haplotype = round(haplotype, digits=0),
cyp3A5D = round(cyp3A5D, digits=0),
sexe_r = round(sexe_r , digits=0),
sexe_d = round(sexe_d , digits=0),
rejet_aigu = round(rejet_aigu , digits=0),
event = round(event, digits=0)
# CYP3A4_1B = round(CYP3A4_1B, digits=0),
# MDR1_C1236T = round(MDR1_C1236T, digits=0),
# MDR1_G2677T = round(MDR1_G2677T, digits=0),
# MDR1_C3435T = round(MDR1_C3435T, digits=0)
)
}
iteration <- c(1:4)
set.seed(seed_value)
augmented_data_x <- map_dfr(iteration, data_augment_avatar, .id = "iter_")
augmented_data_x <- augmented_data_x %>%
select(-iter_)
###############
# Finally, fit the Cox model
fit <- coxph(Surv(delai_event, event) ~ haplotype + age_d + sexe_d + rejet_aigu,
data = augmented_data_x)
coefs <- fit$coefficients
hr <- exp(coefs)
return(data.frame(variable = names(hr), hr = hr))
# Calculate confidence intervals
# ci <- confint(fit)
# return(list(fit = fit, ci = ci))
}
# Generate a list of seed values
seed_value <- sample(x=100) # Modify this if you need different seed values
# Apply the algorithm with different seed values
model_results <- map(seed_value, run_model_with_seed)
# Extract HR and CI from model results
#extracted_results <- map(model_results, extract_hrs_and_cis)
# Combine results into a single data frame
combined_results <- bind_rows(model_results)
# Calculate median HR and CI for each variable
# aggregate_metrics <- combined_results %>%
# group_by(variable) %>%
# summarize(
# median_hr = median(hr),
# median_ci_lower = median(ci_lower),
# median_ci_upper = median(ci_upper)
# )
#
# aggregate_metrics
# Calculate the specified percentiles for HRs for each variable
percentile_metrics <- combined_results %>%
group_by(variable) %>%
summarize(
percentile_0 = quantile(hr, probs = 0),
percentile_5 = quantile(hr, probs = 0.05),
percentile_25 = quantile(hr, probs = 0.25),
percentile_50 = quantile(hr, probs = 0.5),
percentile_75 = quantile(hr, probs = 0.75),
percentile_95 = quantile(hr, probs = 0.95),
percentile_100 = quantile(hr, probs = 1)
) %>%
pivot_longer(-variable, names_to = "Percentile_HR", values_to = "Value_HR") %>%
mutate(Value_HR = round(Value_HR, 2))
# percentile_metrics
# datatable(percentile_metrics)
knitr::kable(percentile_metrics %>% mutate(Value_HR = round(Value_HR, 2)), "simple")
| variable | Percentile_HR | Value_HR |
|---|---|---|
| age_d | percentile_0 | 1.01 |
| age_d | percentile_5 | 1.02 |
| age_d | percentile_25 | 1.03 |
| age_d | percentile_50 | 1.04 |
| age_d | percentile_75 | 1.05 |
| age_d | percentile_95 | 1.07 |
| age_d | percentile_100 | 1.08 |
| haplotype | percentile_0 | 3.74 |
| haplotype | percentile_5 | 4.24 |
| haplotype | percentile_25 | 4.79 |
| haplotype | percentile_50 | 5.30 |
| haplotype | percentile_75 | 5.93 |
| haplotype | percentile_95 | 7.76 |
| haplotype | percentile_100 | 10.34 |
| rejet_aigu | percentile_0 | 1.48 |
| rejet_aigu | percentile_5 | 1.84 |
| rejet_aigu | percentile_25 | 2.65 |
| rejet_aigu | percentile_50 | 3.26 |
| rejet_aigu | percentile_75 | 3.83 |
| rejet_aigu | percentile_95 | 4.78 |
| rejet_aigu | percentile_100 | 5.95 |
| sexe_d | percentile_0 | 1.09 |
| sexe_d | percentile_5 | 1.50 |
| sexe_d | percentile_25 | 1.88 |
| sexe_d | percentile_50 | 2.21 |
| sexe_d | percentile_75 | 2.64 |
| sexe_d | percentile_95 | 3.58 |
| sexe_d | percentile_100 | 8.48 |
These data have been generated by Clement Benoist using the Synthcity python libnrary from the Van der Schaar lab
survae <- read_csv("sfpt24_survae_data_v240111.dat") %>%
select(haplotype:delai_event)
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
survae_factor <- survae %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(survae)
## haplotype cyp3A5D age_r sexe_r
## Min. :1.000 Min. :1.000 Min. :20.00 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:42.00 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :56.00 Median :2.000
## Mean :1.862 Mean :1.941 Mean :53.49 Mean :1.644
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:64.00 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :78.00 Max. :2.000
## age_d sexe_d rejet_aigu TIF
## Min. :13.00 Min. :1.000 Min. :1.000 Min. : 588
## 1st Qu.:21.00 1st Qu.:2.000 1st Qu.:1.000 1st Qu.: 896
## Median :34.00 Median :2.000 Median :1.000 Median :1057
## Mean :32.97 Mean :1.779 Mean :1.292 Mean :1066
## 3rd Qu.:44.00 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1188
## Max. :60.00 Max. :2.000 Max. :2.000 Max. :1912
## event delai_event
## Min. :0.00000 Min. : 1.067
## 1st Qu.:0.00000 1st Qu.: 4.513
## Median :0.00000 Median : 6.130
## Mean :0.05534 Mean : 6.307
## 3rd Qu.:0.00000 3rd Qu.: 7.733
## Max. :1.00000 Max. :14.883
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
survae_factor %>% mutate(DataType = 'Synthetic')
) %>% mutate_if(is.character, factor)
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 253 | ||
| haplotype (%) | 0.047 | |||
| 1 | 97 (38.3) | 71 ( 28.1) | ||
| 2 | 123 (48.6) | 146 ( 57.7) | ||
| 3 | 33 (13.0) | 36 ( 14.2) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 238 ( 94.1) | <0.001 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 56.00 [20.00, 78.00] | 0.820 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 163 ( 64.4) | 0.581 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 34.00 [13.00, 60.00] | <0.001 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 197 ( 77.9) | 0.027 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 74 ( 29.2) | 0.563 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1057.00 [588.00, 1912.00] | <0.001 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 14 ( 5.5) | 0.226 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 6.13 [1.07, 14.88] | 0.017 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 253 (100.0) | <0.001 |
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(survae, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original, data = original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = survae)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = survae)
##
## n= 253, number of events= 14
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 0.7102441 2.0344878 0.4369992 1.625 0.104
## cyp3A5D -0.6508903 0.5215812 1.1378964 -0.572 0.567
## age_r 0.0249004 1.0252130 0.0277982 0.896 0.370
## sexe_r -0.1058940 0.8995200 0.5575219 -0.190 0.849
## age_d 0.0387958 1.0395581 0.0264003 1.470 0.142
## sexe_d -0.3278550 0.7204675 0.6301131 -0.520 0.603
## rejet_aigu 0.2023881 1.2243231 0.5851686 0.346 0.729
## TIF 0.0006969 1.0006972 0.0010851 0.642 0.521
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 2.0345 0.4915 0.86393 4.791
## cyp3A5D 0.5216 1.9172 0.05607 4.852
## age_r 1.0252 0.9754 0.97085 1.083
## sexe_r 0.8995 1.1117 0.30161 2.683
## age_d 1.0396 0.9619 0.98714 1.095
## sexe_d 0.7205 1.3880 0.20954 2.477
## rejet_aigu 1.2243 0.8168 0.38887 3.855
## TIF 1.0007 0.9993 0.99857 1.003
##
## Concordance= 0.66 (se = 0.088 )
## Likelihood ratio test= 6.99 on 8 df, p=0.5
## Wald test = 6.69 on 8 df, p=0.6
## Score (logrank) test = 6.94 on 8 df, p=0.5
ggforest(fit_synthetique)
BootstepAIC synhtetic survae
boot.stepAIC(fit_synthetique, survae, B = 100, k=log(nrow(survae)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = survae)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 5.53 * df
##
## Covariates selected
## (%)
## Null 56
## age_d 21
## haplotype 19
## TIF 10
## sexe_d 7
## cyp3A5D 6
## rejet_aigu 3
## sexe_r 3
## age_r 2
##
## Coefficients Sign
## + (%) - (%)
## haplotype 100.00 0.00
## rejet_aigu 100.00 0.00
## age_d 95.24 4.76
## TIF 80.00 20.00
## age_r 50.00 50.00
## sexe_r 33.33 66.67
## sexe_d 14.29 85.71
## cyp3A5D 0.00 100.00
##
## Stat Significance
## (%)
## age_d 100.00
## age_r 100.00
## haplotype 100.00
## rejet_aigu 100.00
## sexe_r 100.00
## TIF 100.00
## sexe_d 85.71
## cyp3A5D 83.33
##
##
## The stepAIC() for the original data-set gave
## Call: coxph(formula = Surv(delai_event, event) ~ 1, data = survae)
##
## Null model
## log likelihood= -63.40256
## n= 253
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ 1
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 6 -6.990853 164.0814
## 2 - sexe_r 1 0.03588011 7 -6.954973 158.5839
## 3 - rejet_aigu 1 0.11582350 8 -6.839149 153.1663
## 4 - sexe_d 1 0.25862268 9 -6.580527 147.8915
## 5 - cyp3A5D 1 0.33049617 10 -6.250031 142.6886
## 6 - TIF 1 0.42154575 11 -5.828485 137.5768
## 7 - age_r 1 0.90884864 12 -4.919636 132.9523
## 8 - haplotype 1 2.49230935 13 -2.427327 129.9112
## 9 - age_d 1 2.42732679 14 0.000000 126.8051
Final model original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + rejet_aigu , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + rejet_aigu,
## data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.1681 3.2160 0.3261 3.582 0.000341 ***
## rejet_aigu 0.9238 2.5188 0.4661 1.982 0.047482 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.216 0.3109 1.697 6.094
## rejet_aigu 2.519 0.3970 1.010 6.280
##
## Concordance= 0.732 (se = 0.05 )
## Likelihood ratio test= 18.24 on 2 df, p=1e-04
## Wald test = 17.44 on 2 df, p=2e-04
## Score (logrank) test = 19.29 on 2 df, p=6e-05
Final model synthetic
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + rejet_aigu , data = survae)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + rejet_aigu,
## data = survae)
##
## n= 253, number of events= 14
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 0.5636 1.7569 0.4190 1.345 0.179
## rejet_aigu 0.2708 1.3111 0.5664 0.478 0.633
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 1.757 0.5692 0.7728 3.994
## rejet_aigu 1.311 0.7627 0.4320 3.979
##
## Concordance= 0.581 (se = 0.081 )
## Likelihood ratio test= 1.88 on 2 df, p=0.4
## Wald test = 1.88 on 2 df, p=0.4
## Score (logrank) test = 1.9 on 2 df, p=0.4
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=survae, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data <- data.frame(HR=boot_hrs[,1])
# Calculate summary statistics
summary_stats <- quantile(hr_data$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1))
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
# Create the histogram
ggplot(hr_data, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# # geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
# geom_vline(aes(xintercept=summary_stats["25th"][[1]][[1]]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Median"][[1]][[1]]), color="blue", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["75th"])[[1]][[1]], color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
knitr::kable(summary_stats, "simple")
| x | |
|---|---|
| Min | 0.7351723 |
| 2.5th | 0.8378569 |
| 5th | 0.8928419 |
| 25th | 1.4116339 |
| Median | 1.8234747 |
| 75th | 2.5604567 |
| 95th | 4.1124517 |
| 97.5th | 4.6540383 |
| Max | 5.2444288 |
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = survae_factor)
km_synthetique_survae <- ggsurvplot(
km_synthetique,
data = survae_factor,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_survae
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), survae_factor %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
Graphical exploration of distribution
library(GGally)
pm_survae <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_survae
# ggsave("comparaison_distribution_survae.pdf")
# Définir le répertoire où se trouvent les fichiers
repertoire <- "~/Documents/avatar/tvae_ctgan_variability/Gen_data_synth_for_bootstrap/Generate_graft_loss3_multi/Graft_loss_survae"
# Lire tous les fichiers CSV dans le répertoire
liste_donnees <- list.files(repertoire, pattern = "*.dat", full.names = TRUE) %>%
map(read_csv)
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Appliquer le modèle de Cox à chaque jeu de données
resultats <- map(liste_donnees, ~ coxph(Surv(delai_event, event) ~ haplotype + rejet_aigu, data = .x))
# Extraire les HR et les quantiles pour chaque variable
quantiles <- c(0, 5, 25, 50, 75, 95, 100)
hr_haplotype <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "haplotype") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "haplotype")
hr_rejet_aigu <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "rejet_aigu") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "rejet_aigu")
# Afficher et combiner les résultats
hr_results_tvae <- bind_rows(hr_haplotype, hr_rejet_aigu)
# Print summary statistics
knitr::kable(hr_results_tvae, "simple")
| estimate | quantiles | name |
|---|---|---|
| 2.097934e-01 | 0 | haplotype |
| 6.379875e-01 | 5 | haplotype |
| 1.243284e+00 | 25 | haplotype |
| 1.912866e+00 | 50 | haplotype |
| 2.641539e+00 | 75 | haplotype |
| 7.438773e+00 | 95 | haplotype |
| 1.959189e+09 | 100 | haplotype |
| 0.000000e+00 | 0 | rejet_aigu |
| 0.000000e+00 | 5 | rejet_aigu |
| 8.129948e-01 | 25 | rejet_aigu |
| 1.344378e+00 | 50 | rejet_aigu |
| 2.359905e+00 | 75 | rejet_aigu |
| 5.321860e+00 | 95 | rejet_aigu |
| 1.209022e+09 | 100 | rejet_aigu |
survae_augmented <- read_csv("sfpt24_survae_data_large_v240111.dat") %>%
select(haplotype:delai_event)
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
survae_augmented_factor <- survae_augmented %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(survae_augmented)
## haplotype cyp3A5D age_r sexe_r
## Min. :1.000 Min. :1.000 Min. :24.00 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:43.00 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :56.00 Median :2.000
## Mean :1.862 Mean :1.958 Mean :53.64 Mean :1.639
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:63.00 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :78.00 Max. :2.000
## age_d sexe_d rejet_aigu TIF
## Min. :12.00 Min. :1.000 Min. :1.000 Min. : 565
## 1st Qu.:21.00 1st Qu.:2.000 1st Qu.:1.000 1st Qu.: 925
## Median :34.00 Median :2.000 Median :1.000 Median :1058
## Mean :33.44 Mean :1.781 Mean :1.286 Mean :1070
## 3rd Qu.:44.00 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1195
## Max. :66.00 Max. :2.000 Max. :2.000 Max. :2191
## event delai_event
## Min. :0.00000 Min. : 0.8223
## 1st Qu.:0.00000 1st Qu.: 4.5201
## Median :0.00000 Median : 6.2705
## Mean :0.05237 Mean : 6.3985
## 3rd Qu.:0.00000 3rd Qu.: 7.9969
## Max. :1.00000 Max. :15.7025
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
survae_augmented_factor %>% mutate(DataType = 'Synthetic')
) %>% mutate_if(is.character, factor)
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 1012 | ||
| haplotype (%) | 0.020 | |||
| 1 | 97 (38.3) | 296 ( 29.2) | ||
| 2 | 123 (48.6) | 560 ( 55.3) | ||
| 3 | 33 (13.0) | 156 ( 15.4) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 969 ( 95.8) | <0.001 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 56.00 [24.00, 78.00] | 0.888 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 647 ( 63.9) | 0.549 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 34.00 [12.00, 66.00] | <0.001 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 790 ( 78.1) | 0.003 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 289 ( 28.6) | 0.315 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1058.00 [565.00, 2191.00] | <0.001 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 53 ( 5.2) | 0.053 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 6.27 [0.82, 15.70] | 0.002 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 1012 (100.0) | <0.001 |
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(survae_augmented, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original, data = original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = survae_augmented)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = survae_augmented)
##
## n= 1012, number of events= 53
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 7.989e-01 2.223e+00 2.264e-01 3.528 0.000419 ***
## cyp3A5D -1.329e+00 2.647e-01 4.163e-01 -3.192 0.001411 **
## age_r -1.650e-02 9.836e-01 1.250e-02 -1.320 0.186694
## sexe_r -3.120e-01 7.319e-01 2.897e-01 -1.077 0.281371
## age_d 2.036e-02 1.021e+00 1.472e-02 1.383 0.166705
## sexe_d -4.312e-01 6.497e-01 3.228e-01 -1.336 0.181657
## rejet_aigu 3.180e-01 1.374e+00 2.911e-01 1.092 0.274689
## TIF -4.687e-05 1.000e+00 7.057e-04 -0.066 0.947046
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 2.2231 0.4498 1.4263 3.4650
## cyp3A5D 0.2647 3.7779 0.1170 0.5986
## age_r 0.9836 1.0166 0.9598 1.0080
## sexe_r 0.7319 1.3662 0.4149 1.2914
## age_d 1.0206 0.9798 0.9915 1.0505
## sexe_d 0.6497 1.5391 0.3451 1.2233
## rejet_aigu 1.3744 0.7276 0.7768 2.4316
## TIF 1.0000 1.0000 0.9986 1.0013
##
## Concordance= 0.717 (se = 0.038 )
## Likelihood ratio test= 34.77 on 8 df, p=3e-05
## Wald test = 40.5 on 8 df, p=3e-06
## Score (logrank) test = 46.37 on 8 df, p=2e-07
ggforest(fit_synthetique)
BootstepAIC synhtetic augmented survae
boot.stepAIC(fit_synthetique, survae_augmented, B = 100, k=log(nrow(survae_augmented)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = survae_augmented)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 6.92 * df
##
## Covariates selected
## (%)
## haplotype 80
## cyp3A5D 65
## age_d 20
## rejet_aigu 11
## age_r 10
## sexe_d 10
## sexe_r 8
## Null 1
## TIF 1
##
## Coefficients Sign
## + (%) - (%)
## age_d 100 0
## haplotype 100 0
## rejet_aigu 100 0
## TIF 100 0
## age_r 0 100
## cyp3A5D 0 100
## sexe_d 0 100
## sexe_r 0 100
##
## Stat Significance
## (%)
## age_d 100
## age_r 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
## TIF 100
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D,
## data = survae_augmented)
##
## coef exp(coef) se(coef) z p
## haplotype 0.7674 2.1542 0.2187 3.510 0.000449
## cyp3A5D -1.6254 0.1968 0.3899 -4.169 3.07e-05
##
## Likelihood ratio test=25.91 on 2 df, p=2.37e-06
## n= 1012, number of events= 53
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 45 -34.77283 609.6722
## 2 - TIF 1 0.004421406 46 -34.76841 602.7569
## 3 - sexe_r 1 1.153922732 47 -33.61449 596.9912
## 4 - rejet_aigu 1 1.362272126 48 -32.25221 591.4338
## 5 - age_r 1 1.652731892 49 -30.59948 586.1668
## 6 - age_d 1 1.819471847 50 -28.78001 581.0666
## 7 - sexe_d 1 2.874402337 51 -25.90561 577.0213
Final model original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D,
## data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.1992 3.3174 0.3298 3.636 0.000277 ***
## cyp3A5D -0.8160 0.4422 0.5266 -1.550 0.121251
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3174 0.3014 1.7380 6.332
## cyp3A5D 0.4422 2.2614 0.1575 1.241
##
## Concordance= 0.711 (se = 0.051 )
## Likelihood ratio test= 16.1 on 2 df, p=3e-04
## Wald test = 15.71 on 2 df, p=4e-04
## Score (logrank) test = 17.43 on 2 df, p=2e-04
Final model synthetic
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D , data = survae_augmented)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D,
## data = survae_augmented)
##
## n= 1012, number of events= 53
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 0.7674 2.1542 0.2187 3.510 0.000449 ***
## cyp3A5D -1.6254 0.1968 0.3899 -4.169 3.07e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 2.1542 0.4642 1.40333 3.3069
## cyp3A5D 0.1968 5.0807 0.09166 0.4227
##
## Concordance= 0.666 (se = 0.034 )
## Likelihood ratio test= 25.91 on 2 df, p=2e-06
## Wald test = 31.51 on 2 df, p=1e-07
## Score (logrank) test = 37.41 on 2 df, p=8e-09
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=survae_augmented, statistic=cox_model, R=100)
# Calculate summary statistics
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data_haplo <- data.frame(HR=boot_hrs[,1])
hr_data_cyp3A5D <- data.frame(HR=boot_hrs[,2])
# Calculate summary statistics
summary_stats <- quantile(hr_data_haplo$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) %>%
bind_rows(quantile(hr_data_cyp3A5D$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) )
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data_haplo, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# # geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
# geom_vline(aes(xintercept=summary_stats["25th"][[1]][[1]]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Median"][[1]][[1]]), color="blue", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["75th"])[[1]][[1]], color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
knitr::kable(summary_stats, "simple")
| Min | 2.5th | 5th | 25th | Median | 75th | 95th | 97.5th | Max |
|---|---|---|---|---|---|---|---|---|
| 1.4951627 | 1.7074889 | 1.7441759 | 1.9988662 | 2.1810754 | 2.5054622 | 3.1842282 | 3.3852138 | 3.545362 |
| 0.0884964 | 0.1042443 | 0.1096196 | 0.1664911 | 0.2070725 | 0.2645651 | 0.3945493 | 0.4388291 | 1.388868 |
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = survae_augmented_factor)
km_synthetique_survae_augmented <- ggsurvplot(
km_synthetique,
data = survae_augmented_factor,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_survae_augmented
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), survae_augmented_factor %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
Graphical exploration of distribution
library(GGally)
pm_survae_augmented <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_survae_augmented
# ggsave("comparaison_distribution_survae_augmented.pdf")
# Définir le répertoire où se trouvent les fichiers
repertoire <- "~/Documents/avatar/tvae_ctgan_variability/Gen_data_synth_for_bootstrap/Generate_graft_loss3_multi/Graft_loss_survae_large"
# Lire tous les fichiers CSV dans le répertoire
liste_donnees <- list.files(repertoire, pattern = "*.dat", full.names = TRUE) %>%
map(read_csv)
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Appliquer le modèle de Cox à chaque jeu de données
resultats <- map(liste_donnees, ~ coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D, data = .x))
# Extraire les HR et les quantiles pour chaque variable
quantiles <- c(0, 5, 25, 50, 75, 95, 100)
hr_haplotype <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "haplotype") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "haplotype")
hr_cyp3A5D <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "cyp3A5D") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "cyp3A5D")
# Afficher et combiner les résultats
hr_results_augmented_tvae <-bind_rows(hr_haplotype, hr_cyp3A5D)
# Print summary statistics
knitr::kable(hr_results_augmented_tvae, "simple")
| estimate | quantiles | name |
|---|---|---|
| 7.257720e-01 | 0 | haplotype |
| 9.756248e-01 | 5 | haplotype |
| 1.520401e+00 | 25 | haplotype |
| 1.953639e+00 | 50 | haplotype |
| 2.428046e+00 | 75 | haplotype |
| 3.550429e+00 | 95 | haplotype |
| 5.606424e+00 | 100 | haplotype |
| 1.317255e-01 | 0 | cyp3A5D |
| 2.197592e-01 | 5 | cyp3A5D |
| 3.530481e-01 | 25 | cyp3A5D |
| 6.635241e-01 | 50 | cyp3A5D |
| 1.103966e+00 | 75 | cyp3A5D |
| 2.304465e+07 | 95 | cyp3A5D |
| 2.929345e+07 | 100 | cyp3A5D |
These data have been generated by Clement Benoist using the Synthcity python libnrary from the Van der Schaar lab
survctgan <- read_csv("sfpt24_survctgan_data_v240111.dat") %>%
select(haplotype:delai_event)
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
survctgan_factor <- survctgan %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(survctgan)
## haplotype cyp3A5D age_r sexe_r
## Min. :1.000 Min. :1.000 Min. :36.00 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:54.00 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :68.00 Median :2.000
## Mean :1.953 Mean :1.913 Mean :64.31 Mean :1.672
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:75.00 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :78.00 Max. :2.000
## age_d sexe_d rejet_aigu TIF
## Min. :19.00 Min. :1.000 Min. :1.000 Min. : 597
## 1st Qu.:28.00 1st Qu.:2.000 1st Qu.:1.000 1st Qu.: 893
## Median :38.00 Median :2.000 Median :1.000 Median :1051
## Mean :41.07 Mean :1.755 Mean :1.316 Mean :1066
## 3rd Qu.:54.00 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1203
## Max. :71.00 Max. :2.000 Max. :2.000 Max. :2115
## event delai_event
## Min. :0.0000 Min. : 0.9167
## 1st Qu.:0.0000 1st Qu.: 3.3024
## Median :0.0000 Median : 5.7500
## Mean :0.1067 Mean : 5.6764
## 3rd Qu.:0.0000 3rd Qu.: 7.5203
## Max. :1.0000 Max. :14.0767
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
survctgan_factor %>% mutate(DataType = 'Synthetic')
) %>% mutate_if(is.character, factor)
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 253 | ||
| haplotype (%) | 0.003 | |||
| 1 | 97 (38.3) | 64 ( 25.3) | ||
| 2 | 123 (48.6) | 137 ( 54.2) | ||
| 3 | 33 (13.0) | 52 ( 20.6) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 231 ( 91.3) | 0.011 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 68.00 [36.00, 78.00] | <0.001 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 170 ( 67.2) | 0.227 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 38.00 [19.00, 71.00] | 0.023 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 191 ( 75.5) | 0.113 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 80 ( 31.6) | 1.000 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 1051.00 [597.00, 2115.00] | <0.001 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 27 ( 10.7) | 0.548 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 5.75 [0.92, 14.08] | 0.958 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 253 (100.0) | <0.001 |
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(survctgan, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = survctgan)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = survctgan)
##
## n= 253, number of events= 27
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2786307 3.5917181 0.3238779 3.948 7.88e-05 ***
## cyp3A5D -0.7519075 0.4714664 0.7914843 -0.950 0.342114
## age_r -0.0452639 0.9557453 0.0201697 -2.244 0.024823 *
## sexe_r 0.1131043 1.1197487 0.5025305 0.225 0.821925
## age_d 0.0685335 1.0709365 0.0203790 3.363 0.000771 ***
## sexe_d 0.0766062 1.0796168 0.5159812 0.148 0.881974
## rejet_aigu 0.1334569 1.1427720 0.4511890 0.296 0.767391
## TIF 0.0009676 1.0009681 0.0008226 1.176 0.239506
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.5917 0.2784 1.90378 6.7762
## cyp3A5D 0.4715 2.1210 0.09994 2.2241
## age_r 0.9557 1.0463 0.91870 0.9943
## sexe_r 1.1197 0.8931 0.41818 2.9983
## age_d 1.0709 0.9338 1.02900 1.1146
## sexe_d 1.0796 0.9263 0.39270 2.9681
## rejet_aigu 1.1428 0.8751 0.47196 2.7670
## TIF 1.0010 0.9990 0.99936 1.0026
##
## Concordance= 0.853 (se = 0.031 )
## Likelihood ratio test= 34.62 on 8 df, p=3e-05
## Wald test = 31.03 on 8 df, p=1e-04
## Score (logrank) test = 35.96 on 8 df, p=2e-05
ggforest(fit_synthetique)
BootstepAIC synhtetic ctGAN
boot.stepAIC(fit_synthetique, survctgan, B = 100, k=log(nrow(survctgan)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = survctgan)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 5.53 * df
##
## Covariates selected
## (%)
## haplotype 99
## age_d 77
## age_r 54
## cyp3A5D 20
## TIF 15
## sexe_d 12
## rejet_aigu 5
## sexe_r 1
##
## Coefficients Sign
## + (%) - (%)
## age_d 100.00 0.00
## haplotype 100.00 0.00
## sexe_r 100.00 0.00
## TIF 93.33 6.67
## rejet_aigu 60.00 40.00
## sexe_d 50.00 50.00
## cyp3A5D 10.00 90.00
## age_r 0.00 100.00
##
## Stat Significance
## (%)
## age_d 100
## age_r 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
## TIF 100
## cyp3A5D 95
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + age_r +
## age_d, data = survctgan)
##
## coef exp(coef) se(coef) z p
## haplotype 1.29286 3.64319 0.32963 3.922 8.78e-05
## age_r -0.04451 0.95646 0.01767 -2.520 0.011748
## age_d 0.06816 1.07054 0.01991 3.424 0.000617
##
## Likelihood ratio test=32.16 on 3 df, p=4.85e-07
## n= 253, number of events= 27
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype + age_r + age_d
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 19 -34.62360 225.9449
## 2 - sexe_d 1 0.02220719 20 -34.60139 220.4337
## 3 - sexe_r 1 0.05105628 21 -34.55033 214.9514
## 4 - rejet_aigu 1 0.07853818 22 -34.47179 209.4965
## 5 - cyp3A5D 1 0.72079825 23 -33.75100 204.6839
## 6 - TIF 1 1.59409108 24 -32.15691 200.7446
Final model original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + rejet_aigu , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + rejet_aigu,
## data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.1681 3.2160 0.3261 3.582 0.000341 ***
## rejet_aigu 0.9238 2.5188 0.4661 1.982 0.047482 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.216 0.3109 1.697 6.094
## rejet_aigu 2.519 0.3970 1.010 6.280
##
## Concordance= 0.732 (se = 0.05 )
## Likelihood ratio test= 18.24 on 2 df, p=1e-04
## Wald test = 17.44 on 2 df, p=2e-04
## Score (logrank) test = 19.29 on 2 df, p=6e-05
Final model synthetic
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + age_r+ age_d , data = survctgan)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + age_r +
## age_d, data = survctgan)
##
## n= 253, number of events= 27
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.29286 3.64319 0.32963 3.922 8.78e-05 ***
## age_r -0.04451 0.95646 0.01767 -2.520 0.011748 *
## age_d 0.06816 1.07054 0.01991 3.424 0.000617 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.6432 0.2745 1.9094 6.9512
## age_r 0.9565 1.0455 0.9239 0.9902
## age_d 1.0705 0.9341 1.0296 1.1131
##
## Concordance= 0.823 (se = 0.041 )
## Likelihood ratio test= 32.16 on 3 df, p=5e-07
## Wald test = 29.2 on 3 df, p=2e-06
## Score (logrank) test = 33.33 on 3 df, p=3e-07
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype + age_r+ age_d , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=survctgan, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data_haplo <- data.frame(HR=boot_hrs[,1])
hr_data_age_r <- data.frame(HR=boot_hrs[,2])
hr_data_age_d <- data.frame(HR=boot_hrs[,3])
# Calculate summary statistics
summary_stats <- quantile(hr_data_haplo$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) %>%
bind_rows(quantile(hr_data_age_r$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) ) %>%
bind_rows(quantile(hr_data_age_d$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) )
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data_haplo, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# # geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
# geom_vline(aes(xintercept=summary_stats["25th"][[1]][[1]]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Median"][[1]][[1]]), color="blue", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["75th"])[[1]][[1]], color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
knitr::kable(summary_stats, "simple")
| Min | 2.5th | 5th | 25th | Median | 75th | 95th | 97.5th | Max |
|---|---|---|---|---|---|---|---|---|
| 1.9598241 | 2.1502791 | 2.3740937 | 3.2491720 | 4.4621210 | 6.1153594 | 11.491354 | 14.2115888 | 26.5526342 |
| 0.9127347 | 0.9180034 | 0.9225346 | 0.9386579 | 0.9515207 | 0.9615791 | 0.979446 | 0.9811417 | 0.9880432 |
| 1.0163727 | 1.0259490 | 1.0306551 | 1.0529889 | 1.0655245 | 1.0829997 | 1.101860 | 1.1040577 | 1.1096139 |
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = survctgan_factor)
km_synthetique_ctgan <- ggsurvplot(
km_synthetique,
data = survctgan_factor,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_ctgan
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), survctgan_factor %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
Graphical exploraiotn of distribution
library(GGally)
pm_ctgan <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_ctgan
# ggsave("comparaison_distribution_survctgan.pdf")
# Définir le répertoire où se trouvent les fichiers
repertoire <- "~/Documents/avatar/tvae_ctgan_variability/Gen_data_synth_for_bootstrap/Generate_graft_loss3_multi/Graft_loss_surv_ctgan"
# Lire tous les fichiers CSV dans le répertoire
liste_donnees <- list.files(repertoire, pattern = "*.dat", full.names = TRUE) %>%
map(read_csv)
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 253 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Appliquer le modèle de Cox à chaque jeu de données
resultats <- map(liste_donnees, ~ coxph(Surv(delai_event, event) ~ haplotype + age_r+ age_d , data = .x))
# Extraire les HR et les quantiles pour chaque variable
quantiles <- c(0, 5, 25, 50, 75, 95, 100)
hr_haplotype <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "haplotype") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "haplotype")
hr_age_r <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "age_r") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "age_r")
hr_age_d <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "age_d") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "age_d")
# Afficher et combiner les résultats
hr_results_ctgan <- bind_rows(hr_haplotype, hr_age_r,hr_age_d)
# Print summary statistics
knitr::kable(hr_results_ctgan, "simple")
| estimate | quantiles | name |
|---|---|---|
| 0.8235224 | 0 | haplotype |
| 1.2077354 | 5 | haplotype |
| 1.9582998 | 25 | haplotype |
| 2.5582018 | 50 | haplotype |
| 3.6106090 | 75 | haplotype |
| 6.6765699 | 95 | haplotype |
| 27.5221584 | 100 | haplotype |
| 0.8925233 | 0 | age_r |
| 0.9286848 | 5 | age_r |
| 0.9597140 | 25 | age_r |
| 0.9810758 | 50 | age_r |
| 0.9977011 | 75 | age_r |
| 1.0243994 | 95 | age_r |
| 1.0620212 | 100 | age_r |
| 0.9093644 | 0 | age_d |
| 0.9707534 | 5 | age_d |
| 1.0114126 | 25 | age_d |
| 1.0378943 | 50 | age_d |
| 1.0654632 | 75 | age_d |
| 1.1153437 | 95 | age_d |
| 1.1613914 | 100 | age_d |
survctgan_augmented <- read_csv("sfpt24_survctgan_data_large_v240111.dat") %>%
select(haplotype:delai_event)
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
survctgan_augmented_factor <- survctgan_augmented %>%
mutate(haplotype = as.factor(haplotype),
cyp3A5D = as.factor(cyp3A5D),
sexe_r = as.factor(sexe_r),
sexe_d = as.factor(sexe_d),
# CYP3A4_1B = as.factor(CYP3A4_1B),
# MDR1_C1236T = as.factor(MDR1_C1236T),
# MDR1_G2677T = as.factor(MDR1_G2677T),
# MDR1_C3435T = as.factor(MDR1_C3435T),
rejet_aigu = as.factor(rejet_aigu))
summary(original)
## haplotype cyp3A5D age_r sexe_r age_d sexe_d
## autre: 97 Es : 42 Min. :19.00 F: 97 Min. :12.00 F: 79
## het :123 NEs:211 1st Qu.:44.00 M:156 1st Qu.:25.00 M:174
## hom : 33 Median :55.00 Median :40.00
## Mean :53.84 Mean :38.49
## 3rd Qu.:64.00 3rd Qu.:49.00
## Max. :78.00 Max. :73.00
## rejet_aigu TIF event delai_event
## 0:172 Min. : 303 Min. :0.00000 Min. : 0.680
## 1: 81 1st Qu.: 975 1st Qu.:0.00000 1st Qu.: 2.920
## Median :1153 Median :0.00000 Median : 5.340
## Mean :1199 Mean :0.08696 Mean : 6.044
## 3rd Qu.:1368 3rd Qu.:0.00000 3rd Qu.: 8.700
## Max. :2580 Max. :1.00000 Max. :15.830
summary(survctgan_augmented)
## haplotype cyp3A5D age_r sexe_r age_d
## Min. :1.000 Min. :1.00 Min. :19.00 Min. :1.000 Min. :12.00
## 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:40.00 1st Qu.:1.000 1st Qu.:21.00
## Median :2.000 Median :2.00 Median :55.00 Median :2.000 Median :30.00
## Mean :1.845 Mean :1.83 Mean :51.89 Mean :1.642 Mean :31.99
## 3rd Qu.:2.000 3rd Qu.:2.00 3rd Qu.:63.00 3rd Qu.:2.000 3rd Qu.:42.00
## Max. :3.000 Max. :2.00 Max. :78.00 Max. :2.000 Max. :58.00
## sexe_d rejet_aigu TIF event
## Min. :1.000 Min. :1.000 Min. : 303.0 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 826.8 1st Qu.:0.0000
## Median :2.000 Median :1.000 Median : 993.0 Median :0.0000
## Mean :1.713 Mean :1.327 Mean :1020.8 Mean :0.1354
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1174.8 3rd Qu.:0.0000
## Max. :2.000 Max. :2.000 Max. :2580.0 Max. :1.0000
## delai_event
## Min. : 1.022
## 1st Qu.: 4.044
## Median : 5.876
## Mean : 6.026
## 3rd Qu.: 7.743
## Max. :15.119
# Combine original and synthetic data for visualization
combined_data <- rbind(
original1 %>% mutate(DataType = 'Original'),
survctgan_augmented_factor %>% mutate(DataType = 'Synthetic')
) %>% mutate_if(is.character, factor)
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
| Original | Synthetic | p | test | |
|---|---|---|---|---|
| n | 253 | 1012 | ||
| haplotype (%) | 0.108 | |||
| 1 | 97 (38.3) | 342 ( 33.8) | ||
| 2 | 123 (48.6) | 485 ( 47.9) | ||
| 3 | 33 (13.0) | 185 ( 18.3) | ||
| cyp3A5D = 2 (%) | 211 (83.4) | 840 ( 83.0) | 0.955 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 55.00 [19.00, 78.00] | 0.119 | nonnorm |
| sexe_r = 2 (%) | 156 (61.7) | 650 ( 64.2) | 0.492 | |
| age_d (median [range]) | 40.00 [12.00, 73.00] | 30.00 [12.00, 58.00] | <0.001 | nonnorm |
| sexe_d = 2 (%) | 174 (68.8) | 722 ( 71.3) | 0.467 | |
| rejet_aigu = 2 (%) | 81 (32.0) | 331 ( 32.7) | 0.893 | |
| TIF (median [range]) | 1153.00 [303.00, 2580.00] | 993.00 [303.00, 2580.00] | <0.001 | nonnorm |
| event = 1 (%) | 22 ( 8.7) | 137 ( 13.5) | 0.049 | |
| delai_event (median [range]) | 5.34 [0.68, 15.83] | 5.88 [1.02, 15.12] | 0.102 | nonnorm |
| DataType = Synthetic (%) | 0 ( 0.0) | 1012 (100.0) | <0.001 |
# boxplots
plot_boxplot(combined_data , by ="DataType")
# histograms
# Function to create histogram for each continuous variable
plot_histograms <- function(data, var_name, group_var) {
ggplot(data, aes(x = !!sym(var_name), fill = !!sym(group_var))) +
geom_histogram(alpha = 0.5,show.legend = FALSE) +
labs(x = var_name, y = "Count") +
theme_minimal() +
ggtitle(paste(var_name))
}
# Using select_if to identify continuous variables and map to apply the function
plots <- combined_data %>%
select( -sexe_r,-sexe_d) %>%
select_if(is.numeric) %>%
names() %>%
map(~plot_histograms(combined_data, ., "DataType"))
# Optionally, print or arrange plots (e.g., using gridExtra or patchwork packages)
wrap_plots(plots)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Correlation Analysis
cor_real <- cor(original1, use = "complete.obs")
cor_synthetic <- cor(survctgan_augmented, use = "complete.obs")
# plots
ggcorrplot(cor_real, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# plots
ggcorrplot(cor_synthetic, hc.order = TRUE, type = "lower",
lab = TRUE, pch.cex = 5,
tl.cex = 6, lab_size = 2)
# original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.2112071 3.3575352 0.3463273 3.497 0.00047 ***
## cyp3A5D -1.2323909 0.2915946 0.5567303 -2.214 0.02685 *
## age_r -0.0039521 0.9960557 0.0187880 -0.210 0.83339
## sexe_r -0.0438849 0.9570641 0.4606422 -0.095 0.92410
## age_d 0.0360206 1.0366772 0.0203668 1.769 0.07696 .
## sexe_d 0.2786636 1.3213627 0.5181402 0.538 0.59070
## rejet_aigu 1.0124644 2.7523756 0.4804379 2.107 0.03508 *
## TIF -0.0002268 0.9997732 0.0005753 -0.394 0.69345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.3575 0.2978 1.70305 6.6193
## cyp3A5D 0.2916 3.4294 0.09792 0.8683
## age_r 0.9961 1.0040 0.96004 1.0334
## sexe_r 0.9571 1.0449 0.38801 2.3607
## age_d 1.0367 0.9646 0.99611 1.0789
## sexe_d 1.3214 0.7568 0.47861 3.6481
## rejet_aigu 2.7524 0.3633 1.07339 7.0576
## TIF 0.9998 1.0002 0.99865 1.0009
##
## Concordance= 0.758 (se = 0.054 )
## Likelihood ratio test= 24.6 on 8 df, p=0.002
## Wald test = 21.09 on 8 df, p=0.007
## Score (logrank) test = 25.84 on 8 df, p=0.001
ggforest(fit_original)
# synthetique
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF , data = survctgan_augmented)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = survctgan_augmented)
##
## n= 1012, number of events= 137
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 7.812e-01 2.184e+00 1.330e-01 5.876 4.21e-09 ***
## cyp3A5D -1.336e+00 2.628e-01 1.940e-01 -6.889 5.61e-12 ***
## age_r -2.002e-02 9.802e-01 7.835e-03 -2.555 0.01063 *
## sexe_r -3.167e-01 7.285e-01 1.927e-01 -1.644 0.10020
## age_d 6.554e-02 1.068e+00 9.929e-03 6.601 4.07e-11 ***
## sexe_d 2.623e-01 1.300e+00 1.921e-01 1.366 0.17205
## rejet_aigu 5.571e-01 1.746e+00 1.902e-01 2.928 0.00341 **
## TIF 6.576e-05 1.000e+00 2.986e-04 0.220 0.82568
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 2.1841 0.4579 1.6831 2.8343
## cyp3A5D 0.2628 3.8055 0.1797 0.3843
## age_r 0.9802 1.0202 0.9652 0.9954
## sexe_r 0.7285 1.3726 0.4994 1.0628
## age_d 1.0677 0.9366 1.0472 1.0887
## sexe_d 1.2999 0.7693 0.8921 1.8942
## rejet_aigu 1.7456 0.5729 1.2023 2.5344
## TIF 1.0001 0.9999 0.9995 1.0007
##
## Concordance= 0.8 (se = 0.022 )
## Likelihood ratio test= 198.1 on 8 df, p=<2e-16
## Wald test = 187.2 on 8 df, p=<2e-16
## Score (logrank) test = 240.2 on 8 df, p=<2e-16
ggforest(fit_synthetique)
BootstepAIC synhtetic augmented ctGAN
boot.stepAIC(fit_synthetique, survctgan_augmented, B = 100, k=log(nrow(survctgan_augmented)))
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_r + sexe_r + age_d + sexe_d + rejet_aigu + TIF, data = survctgan_augmented)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 6.92 * df
##
## Covariates selected
## (%)
## age_d 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 60
## age_r 35
## sexe_d 11
## sexe_r 7
##
## Coefficients Sign
## + (%) - (%)
## age_d 100 0
## haplotype 100 0
## rejet_aigu 100 0
## sexe_d 100 0
## age_r 0 100
## cyp3A5D 0 100
## sexe_r 0 100
##
## Stat Significance
## (%)
## age_d 100
## age_r 100
## cyp3A5D 100
## haplotype 100
## rejet_aigu 100
## sexe_d 100
## sexe_r 100
##
##
## The stepAIC() for the original data-set gave
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_d, data = survctgan_augmented)
##
## coef exp(coef) se(coef) z p
## haplotype 0.801802 2.229554 0.123137 6.511 7.44e-11
## cyp3A5D -1.437209 0.237590 0.182105 -7.892 2.97e-15
## age_d 0.060010 1.061847 0.009173 6.542 6.08e-11
##
## Likelihood ratio test=181.3 on 3 df, p=< 2.2e-16
## n= 1012, number of events= 137
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_r + sexe_r +
## age_d + sexe_d + rejet_aigu + TIF
##
## Final Model:
## Surv(delai_event, event) ~ haplotype + cyp3A5D + age_d
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 129 -198.0495 1466.889
## 2 - TIF 1 0.04805559 130 -198.0015 1460.017
## 3 - sexe_d 1 1.88958696 131 -196.1119 1454.987
## 4 - sexe_r 1 2.13631707 132 -193.9756 1450.204
## 5 - age_r 1 5.78800702 133 -188.1876 1449.072
## 6 - rejet_aigu 1 6.83421403 134 -181.3533 1448.986
Final model original
fit_original <- coxph(Surv(delai_event, event) ~ haplotype + rejet_aigu , data = original1)
summary(fit_original)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + rejet_aigu,
## data = original1)
##
## n= 253, number of events= 22
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 1.1681 3.2160 0.3261 3.582 0.000341 ***
## rejet_aigu 0.9238 2.5188 0.4661 1.982 0.047482 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 3.216 0.3109 1.697 6.094
## rejet_aigu 2.519 0.3970 1.010 6.280
##
## Concordance= 0.732 (se = 0.05 )
## Likelihood ratio test= 18.24 on 2 df, p=1e-04
## Wald test = 17.44 on 2 df, p=2e-04
## Score (logrank) test = 19.29 on 2 df, p=6e-05
Final model synthetic
fit_synthetique <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D+ age_d , data = survctgan_augmented)
summary(fit_synthetique)
## Call:
## coxph(formula = Surv(delai_event, event) ~ haplotype + cyp3A5D +
## age_d, data = survctgan_augmented)
##
## n= 1012, number of events= 137
##
## coef exp(coef) se(coef) z Pr(>|z|)
## haplotype 0.801802 2.229554 0.123137 6.511 7.44e-11 ***
## cyp3A5D -1.437209 0.237590 0.182105 -7.892 2.97e-15 ***
## age_d 0.060010 1.061847 0.009173 6.542 6.08e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## haplotype 2.2296 0.4485 1.7515 2.8381
## cyp3A5D 0.2376 4.2089 0.1663 0.3395
## age_d 1.0618 0.9418 1.0429 1.0811
##
## Concordance= 0.792 (se = 0.022 )
## Likelihood ratio test= 181.3 on 3 df, p=<2e-16
## Wald test = 174.4 on 3 df, p=<2e-16
## Score (logrank) test = 219 on 3 df, p=<2e-16
Allow to define the variability range of HR for a given dataset (intra dataset variability)
# Define the Cox model
cox_model <- function(data, indices) {
d <- data[indices,] # allows bootstrapping to sample the data
fit <- coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D+ age_d , data=d)
return(fit$coefficients)
}
# Set the seed for reproducibility
set.seed(12)
# Bootstrap the Cox model
boot_results <- boot(data=survctgan_augmented, statistic=cox_model, R=100)
# Convert bootstrap results to a data frame for ggplot2
boot_hrs <- exp(boot_results$t) # Convert log(HR) to HR
hr_data_haplo <- data.frame(HR=boot_hrs[,1])
hr_data_cyp3A5D <- data.frame(HR=boot_hrs[,2])
hr_data_age_d <- data.frame(HR=boot_hrs[,3])
# Calculate summary statistics
summary_stats <- quantile(hr_data_haplo$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) %>%
bind_rows(quantile(hr_data_cyp3A5D$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) ) %>%
bind_rows(quantile(hr_data_age_d$HR, probs = c(0, 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975, 1)) )
names(summary_stats) <- c("Min","2.5th", "5th", "25th", "Median", "75th", "95th","97.5th","Max")
# Create the histogram
ggplot(hr_data_haplo, aes(x=HR)) +
geom_histogram(bins=30, fill="#007a86", color="black") +
# # geom_vline(aes(xintercept=summary_stats["Min"]), color="red", linetype="dashed") +
# geom_vline(aes(xintercept=summary_stats["25th"][[1]][[1]]), color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Median"][[1]][[1]]), color="blue", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["75th"])[[1]][[1]], color="gray", linetype="dashed", linewidth=2) +
# geom_vline(aes(xintercept=summary_stats["Max"]), color="purple", linetype="dashed") +
labs(title="Bootstrap Distribution of Hazard Ratios", x="Hazard Ratio (HR)", y="Frequency") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Print summary statistics
knitr::kable(summary_stats, "simple")
| Min | 2.5th | 5th | 25th | Median | 75th | 95th | 97.5th | Max |
|---|---|---|---|---|---|---|---|---|
| 1.6033717 | 1.7518195 | 1.8077805 | 2.1068108 | 2.2414160 | 2.4216252 | 2.7144741 | 2.8133390 | 3.0197868 |
| 0.1489253 | 0.1669671 | 0.1798056 | 0.2064424 | 0.2305468 | 0.2583895 | 0.3102633 | 0.3266667 | 0.4055641 |
| 1.0333989 | 1.0347498 | 1.0369765 | 1.0538198 | 1.0626267 | 1.0676404 | 1.0801418 | 1.0862602 | 1.0930407 |
km_original <- survfit(Surv(delai_event, event) ~ haplotype, data = original)
ggsurvplot(
km_original,
data = original,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique <- survfit(Surv(delai_event, event) ~ haplotype, data = survctgan_augmented_factor)
km_synthetique_ctgan_augmented <- ggsurvplot(
km_synthetique,
data = survctgan_augmented_factor,
size = 1, # change line size
conf.int = TRUE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.25, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
km_synthetique_ctgan_augmented
Plots original & synthetic combined
## combine data
combined_df <- rbind(original %>% mutate(group = "original"), survctgan_augmented_factor %>% mutate(group = "synthetic")) %>% mutate(combined_haplotype = str_c(haplotype,"_", group ))
## fit the model
km_combined <- survfit(Surv(delai_event, event) ~ combined_haplotype, data = combined_df)
# plot
ggsurvplot(fit = km_combined,
data = combined_df,
size = 1, # change line size
conf.int = FALSE, # Add confidence interval
pval = TRUE, # Add p-value
risk.table = TRUE, # Add risk table
risk.table.col = "strata",# Risk table color by groups
risk.table.height = 0.35, # Useful to change when you have multiple groups
ggtheme = theme_bw() # Change ggplot2 theme
)
Graphical exploraiotn of distribution
library(GGally)
pm_ctgan_augmented <- combined_df %>% select(haplotype:delai_event, group) %>% ggpairs(
ggplot2::aes(colour = group,alpha = 0.5),
upper = list(continuous = wrap("cor", size = 1.5)),
lower=list(combo=wrap("facethist", binwidth=0.5))) +
theme(strip.text.x = element_text(size = 5),
strip.text.y = element_text(size = 5),axis.text = element_text(size = 5))
pm_ctgan_augmented
# ggsave("comparaison_distribution_survctgan_augmented.pdf")
# Définir le répertoire où se trouvent les fichiers
repertoire <- "~/Documents/avatar/tvae_ctgan_variability/Gen_data_synth_for_bootstrap/Generate_graft_loss3_multi/Graft_loss_surv_ctgan_large"
# Lire tous les fichiers CSV dans le répertoire
liste_donnees <- list.files(repertoire, pattern = "*.dat", full.names = TRUE) %>%
map(read_csv)
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1012 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): haplotype, cyp3A5D, age_r, sexe_r, age_d, sexe_d, rejet_aigu, TIF,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Appliquer le modèle de Cox à chaque jeu de données
resultats <- map(liste_donnees, ~ coxph(Surv(delai_event, event) ~ haplotype + cyp3A5D+ age_d , data = .x))
# Extraire les HR et les quantiles pour chaque variable
quantiles <- c(0, 5, 25, 50, 75, 95, 100)
hr_haplotype <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "haplotype") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "haplotype")
hr_cyp3A5D <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "cyp3A5D") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "cyp3A5D")
hr_age_d <- map(resultats, ~ tidy(.x, exponentiate = TRUE)) %>%
map_dfr(~ .x %>% filter(term == "age_d") %>% select(estimate)) %>%
reframe(across(estimate, ~ quantile(., probs = quantiles/100))) %>%
mutate(quantiles = c(0, 5, 25, 50, 75, 95, 100), name = "age_d")
# Afficher et combiner les résultats
hr_results_augmented_ctgan <- bind_rows(hr_haplotype, hr_cyp3A5D,hr_age_d)
# Print summary statistics
knitr::kable(hr_results_augmented_ctgan, "simple")
| estimate | quantiles | name |
|---|---|---|
| 1.328745e+00 | 0 | haplotype |
| 1.455721e+00 | 5 | haplotype |
| 2.102828e+00 | 25 | haplotype |
| 2.558735e+00 | 50 | haplotype |
| 3.258177e+00 | 75 | haplotype |
| 4.819013e+00 | 95 | haplotype |
| 5.614200e+00 | 100 | haplotype |
| 9.628560e-02 | 0 | cyp3A5D |
| 1.944780e-01 | 5 | cyp3A5D |
| 3.290273e-01 | 25 | cyp3A5D |
| 5.433777e-01 | 50 | cyp3A5D |
| 8.007847e-01 | 75 | cyp3A5D |
| 1.773390e+00 | 95 | cyp3A5D |
| 7.977806e+06 | 100 | cyp3A5D |
| 9.450976e-01 | 0 | age_d |
| 9.962434e-01 | 5 | age_d |
| 1.013176e+00 | 25 | age_d |
| 1.034718e+00 | 50 | age_d |
| 1.059836e+00 | 75 | age_d |
| 1.085749e+00 | 95 | age_d |
| 1.115233e+00 | 100 | age_d |
combined_data <- bind_rows(original1 %>% mutate(DataType = 'Original', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
avatars_tibble_knn5 %>% mutate(DataType = 'knn5', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
avatars_tibble_knn20 %>% mutate(DataType = 'knn20', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
avatars_tibble_knn10 %>% mutate(DataType = 'knn10', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
augmented_data_5 %>% mutate(DataType = 'augmented_knn5', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
augmented_data_20 %>% mutate(DataType = 'augmented_knn20', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
augmented_data_10 %>% mutate(DataType = 'augmented_knn10', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
survae_factor %>% mutate(DataType = 'survae', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
survae_augmented_factor %>% mutate(DataType = 'augmented_survae', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
survctgan_factor %>% mutate(DataType = 'ctgan', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event)),
survctgan_augmented_factor %>% mutate(DataType = 'augmented_ctgan', haplotype = as.factor(haplotype), cyp3A5D = as.factor(cyp3A5D), sexe_r = as.factor(sexe_r), sexe_d = as.factor(sexe_d),rejet_aigu = as.factor(rejet_aigu),event = as.factor(event))
)
## Vector of categorical variables that need transformation
catVars <- c("haplotype", "cyp3A5D", "sexe_r", "sexe_d",
"rejet_aigu", "event")
## Create a variable list.
vars <- c( "haplotype", "cyp3A5D", "age_r", "sexe_r", "age_d", "sexe_d",
"rejet_aigu", "TIF", "event", "delai_event", "DataType")
tableOne <- CreateTableOne(vars = vars, strata = "DataType",factorVars = catVars, data = combined_data)
tableOne2<-print(tableOne, nonnormal = c( "age_r", "age_d", "TIF", "delai_event"), printToggle=F, minMax=T)
kableone(tableOne2)
| augmented_ctgan | augmented_knn10 | augmented_knn20 | augmented_knn5 | augmented_survae | ctgan | knn10 | knn20 | knn5 | Original | survae | p | test | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| n | 1012 | 1012 | 1012 | 1012 | 1012 | 253 | 253 | 253 | 253 | 253 | 253 | ||
| haplotype (%) | <0.001 | ||||||||||||
| 1 | 342 ( 33.8) | 356 ( 35.2) | 345 ( 34.1) | 373 ( 36.9) | 296 ( 29.2) | 64 ( 25.3) | 93 ( 36.8) | 79 ( 31.2) | 93 ( 36.8) | 97 ( 38.3) | 71 ( 28.1) | ||
| 2 | 485 ( 47.9) | 602 ( 59.5) | 614 ( 60.7) | 577 ( 57.0) | 560 ( 55.3) | 137 ( 54.2) | 146 ( 57.7) | 159 ( 62.8) | 144 ( 56.9) | 123 ( 48.6) | 146 ( 57.7) | ||
| 3 | 185 ( 18.3) | 54 ( 5.3) | 53 ( 5.2) | 62 ( 6.1) | 156 ( 15.4) | 52 ( 20.6) | 14 ( 5.5) | 15 ( 5.9) | 16 ( 6.3) | 33 ( 13.0) | 36 ( 14.2) | ||
| cyp3A5D = 2 (%) | 840 ( 83.0) | 880 ( 87.0) | 909 ( 89.8) | 859 ( 84.9) | 969 ( 95.8) | 231 ( 91.3) | 219 ( 86.6) | 224 ( 88.5) | 217 ( 85.8) | 211 ( 83.4) | 238 ( 94.1) | <0.001 | |
| age_r (median [range]) | 55.00 [19.00, 78.00] | 54.41 [23.53, 75.26] | 55.60 [24.90, 75.77] | 55.42 [23.08, 77.36] | 56.00 [24.00, 78.00] | 68.00 [36.00, 78.00] | 54.21 [24.23, 73.48] | 55.12 [25.46, 74.57] | 55.09 [24.19, 73.74] | 55.00 [19.00, 78.00] | 56.00 [20.00, 78.00] | <0.001 | nonnorm |
| sexe_r = 2 (%) | 650 ( 64.2) | 666 ( 65.8) | 700 ( 69.2) | 656 ( 64.8) | 647 ( 63.9) | 170 ( 67.2) | 165 ( 65.2) | 178 ( 70.4) | 163 ( 64.4) | 156 ( 61.7) | 163 ( 64.4) | 0.225 | |
| age_d (median [range]) | 30.00 [12.00, 58.00] | 39.92 [15.46, 63.56] | 39.73 [16.63, 67.75] | 39.96 [15.04, 68.49] | 34.00 [12.00, 66.00] | 38.00 [19.00, 71.00] | 39.60 [15.46, 62.80] | 39.04 [18.38, 67.75] | 39.82 [19.40, 68.49] | 40.00 [12.00, 73.00] | 34.00 [13.00, 60.00] | <0.001 | nonnorm |
| sexe_d = 2 (%) | 722 ( 71.3) | 743 ( 73.4) | 743 ( 73.4) | 717 ( 70.8) | 790 ( 78.1) | 191 ( 75.5) | 188 ( 74.3) | 188 ( 74.3) | 185 ( 73.1) | 174 ( 68.8) | 197 ( 77.9) | 0.011 | |
| rejet_aigu = 2 (%) | 331 ( 32.7) | 276 ( 27.3) | 244 ( 24.1) | 294 ( 29.1) | 289 ( 28.6) | 80 ( 31.6) | 72 ( 28.5) | 63 ( 24.9) | 73 ( 28.9) | 81 ( 32.0) | 74 ( 29.2) | 0.009 | |
| TIF (median [range]) | 993.00 [303.00, 2580.00] | 1141.45 [372.32, 2040.25] | 1144.15 [576.99, 2091.07] | 1158.71 [456.35, 2362.15] | 1058.00 [565.00, 2191.00] | 1051.00 [597.00, 2115.00] | 1156.78 [570.38, 1987.46] | 1135.02 [630.81, 2091.07] | 1174.55 [456.35, 2362.15] | 1153.00 [303.00, 2580.00] | 1057.00 [588.00, 1912.00] | <0.001 | nonnorm |
| event = 1 (%) | 137 ( 13.5) | 80 ( 7.9) | 55 ( 5.4) | 86 ( 8.5) | 53 ( 5.2) | 27 ( 10.7) | 20 ( 7.9) | 13 ( 5.1) | 21 ( 8.3) | 22 ( 8.7) | 14 ( 5.5) | <0.001 | |
| delai_event (median [range]) | 5.88 [1.02, 15.12] | 5.31 [0.87, 15.33] | 5.21 [1.02, 15.10] | 5.45 [0.97, 14.94] | 6.27 [0.82, 15.70] | 5.75 [0.92, 14.08] | 5.60 [0.96, 15.33] | 5.34 [1.20, 15.10] | 5.36 [0.97, 14.94] | 5.34 [0.68, 15.83] | 6.13 [1.07, 14.88] | <0.001 | nonnorm |
| DataType (%) | <0.001 | ||||||||||||
| augmented_ctgan | 1012 (100.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| augmented_knn10 | 0 ( 0.0) | 1012 (100.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| augmented_knn20 | 0 ( 0.0) | 0 ( 0.0) | 1012 (100.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| augmented_knn5 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 1012 (100.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| augmented_survae | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 1012 (100.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| ctgan | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 253 (100.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| knn10 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 253 (100.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| knn20 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 253 (100.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| knn5 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 253 (100.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| Original | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 253 (100.0) | 0 ( 0.0) | ||
| survae | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 253 (100.0) |
library(patchwork)
pm_knn5
ggsave("Figure1.pdf")
## Saving 7 x 5 in image
# List of ggsurvplots
require("survminer")
splots <- list()
splots[[1]] <- km_original_plot
splots[[2]] <- km_synthetique_avatar_5
splots[[3]] <- km_synthetique_avatar_5_augmented
splots[[4]] <- km_synthetique_survae
splots[[5]] <- km_synthetique_survae_augmented
splots[[6]] <- km_synthetique_ctgan
splots[[7]] <- km_synthetique_ctgan_augmented
# Arrange multiple ggsurvplots and print the output
arrange_ggsurvplots(splots, print = TRUE,
ncol = 1, nrow = 7)#, risk.table.height = 0.4)
if (FALSE) {
# Arrange and save into pdf file
res <- arrange_ggsurvplots(splots, print = FALSE)
ggsave("Figure2.pdf", res)
}
#knn5
metric_avatar_knn5_graft_loss <- read_csv("Metrics_avatar2/metric_avatar_knn5_graft_loss.csv")
## Rows: 253 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): dcr, nndr
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metric_knn5 <- metric_avatar_knn5_graft_loss %>%
summarise(
across(
everything(),
list(
min = ~ quantile(., probs = 0),
p5 = ~ quantile(., probs = 0.05),
p25 = ~ quantile(., probs = 0.25),
p50 = ~ quantile(., probs = 0.5),
p75 = ~ quantile(., probs = 0.75),
p95 = ~ quantile(., probs = 0.95),
max = ~ quantile(., probs = 1)
)
)
)
knitr::kable(metric_knn5, "simple")
| dcr_min | dcr_p5 | dcr_p25 | dcr_p50 | dcr_p75 | dcr_p95 | dcr_max | nndr_min | nndr_p5 | nndr_p25 | nndr_p50 | nndr_p75 | nndr_p95 | nndr_max |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.159286 | 0.2838517 | 0.4241048 | 0.5665813 | 0.742086 | 1.008724 | 2.029339 | 0.0641988 | 0.1380793 | 0.2489758 | 0.3341827 | 0.5209876 | 0.7527488 | 0.924452 |
#knn5 augmented
metric_avatar_knn5_large_graft_loss <- read_csv("Metrics_avatar2/metric_avatar_knn5_large_graft_loss.csv")
## Rows: 1012 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): dcr, nndr
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metric_knn5_augmented <- metric_avatar_knn5_large_graft_loss %>% summarise(
across(
everything(),
list(
min = ~ quantile(., probs = 0),
p5 = ~ quantile(., probs = 0.05),
p25 = ~ quantile(., probs = 0.25),
p50 = ~ quantile(., probs = 0.5),
p75 = ~ quantile(., probs = 0.75),
p95 = ~ quantile(., probs = 0.95),
max = ~ quantile(., probs = 1)
)
)
)
knitr::kable(metric_knn5_augmented, "simple")
| dcr_min | dcr_p5 | dcr_p25 | dcr_p50 | dcr_p75 | dcr_p95 | dcr_max | nndr_min | nndr_p5 | nndr_p25 | nndr_p50 | nndr_p75 | nndr_p95 | nndr_max |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.0262085 | 0.0891968 | 0.199533 | 0.4231659 | 0.7846543 | 1.506673 | 2.486024 | 0.0191986 | 0.0503164 | 0.1179117 | 0.267012 | 0.5982074 | 0.9433168 | 0.9981841 |
#survae
metric_survae_graft_loss <- read_csv("Metrics_avatar2/metric_survae_graft_loss.csv")
## Rows: 253 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): dcr, nndr
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metric_survae <- metric_survae_graft_loss %>%
summarise(
across(
everything(),
list(
min = ~ quantile(., probs = 0),
p5 = ~ quantile(., probs = 0.05),
p25 = ~ quantile(., probs = 0.25),
p50 = ~ quantile(., probs = 0.5),
p75 = ~ quantile(., probs = 0.75),
p95 = ~ quantile(., probs = 0.95),
max = ~ quantile(., probs = 1)
)
)
)
knitr::kable(metric_survae, "simple")
| dcr_min | dcr_p5 | dcr_p25 | dcr_p50 | dcr_p75 | dcr_p95 | dcr_max | nndr_min | nndr_p5 | nndr_p25 | nndr_p50 | nndr_p75 | nndr_p95 | nndr_max |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.4054311 | 0.8699173 | 1.551997 | 1.906325 | 2.389682 | 2.995105 | 3.468345 | 0.2303528 | 0.5463846 | 0.7706567 | 0.8789318 | 0.9440272 | 0.9877799 | 0.9990694 |
#survae augmented
metric_survae_large_graft_loss <- read_csv("Metrics_avatar2/metric_survae_large_graft_loss.csv")
## Rows: 1012 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): dcr, nndr
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metric_survae_augmented <- metric_survae_large_graft_loss %>%
summarise(
across(
everything(),
list(
min = ~ quantile(., probs = 0),
p5 = ~ quantile(., probs = 0.05),
p25 = ~ quantile(., probs = 0.25),
p50 = ~ quantile(., probs = 0.5),
p75 = ~ quantile(., probs = 0.75),
p95 = ~ quantile(., probs = 0.95),
max = ~ quantile(., probs = 1)
)
)
)
knitr::kable(metric_survae_augmented, "simple")
| dcr_min | dcr_p5 | dcr_p25 | dcr_p50 | dcr_p75 | dcr_p95 | dcr_max | nndr_min | nndr_p5 | nndr_p25 | nndr_p50 | nndr_p75 | nndr_p95 | nndr_max |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.3015096 | 0.8650204 | 1.537663 | 1.938557 | 2.418375 | 3.083595 | 4.294902 | 0.1337203 | 0.544702 | 0.7858148 | 0.8997433 | 0.9570271 | 0.9939075 | 0.9999053 |
#ctgan
metric_survctgan_graft_loss <- read_csv("Metrics_avatar2/metric_survctgan_graft_loss.csv")
## Rows: 253 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): dcr, nndr
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metric_ctgan <- metric_survctgan_graft_loss %>%
summarise(
across(
everything(),
list(
min = ~ quantile(., probs = 0),
p5 = ~ quantile(., probs = 0.05),
p25 = ~ quantile(., probs = 0.25),
p50 = ~ quantile(., probs = 0.5),
p75 = ~ quantile(., probs = 0.75),
p95 = ~ quantile(., probs = 0.95),
max = ~ quantile(., probs = 1)
)
)
)
knitr::kable(metric_ctgan, "simple")
| dcr_min | dcr_p5 | dcr_p25 | dcr_p50 | dcr_p75 | dcr_p95 | dcr_max | nndr_min | nndr_p5 | nndr_p25 | nndr_p50 | nndr_p75 | nndr_p95 | nndr_max |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.511736 | 0.8324347 | 1.279238 | 1.781562 | 2.418493 | 3.395527 | 5.59313 | 0.2788541 | 0.5188259 | 0.7430643 | 0.8675757 | 0.9483103 | 0.9925245 | 0.999383 |
#ctgan augmented
metric_avatar_survctgan_large_graft_loss <- read_csv("Metrics_avatar2/metric_survctgan_large_graft_loss.csv")
## Rows: 1012 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): dcr, nndr
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metric_ctgan_augmented <- metric_avatar_survctgan_large_graft_loss %>%
summarise(
across(
everything(),
list(
min = ~ quantile(., probs = 0),
p5 = ~ quantile(., probs = 0.05),
p25 = ~ quantile(., probs = 0.25),
p50 = ~ quantile(., probs = 0.5),
p75 = ~ quantile(., probs = 0.75),
p95 = ~ quantile(., probs = 0.95),
max = ~ quantile(., probs = 1)
)
)
)
knitr::kable(metric_ctgan_augmented, "simple")
| dcr_min | dcr_p5 | dcr_p25 | dcr_p50 | dcr_p75 | dcr_p95 | dcr_max | nndr_min | nndr_p5 | nndr_p25 | nndr_p50 | nndr_p75 | nndr_p95 | nndr_max |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.2531389 | 0.8154637 | 1.335732 | 1.868256 | 2.592339 | 3.62116 | 5.117563 | 0.2075465 | 0.5173092 | 0.753877 | 0.8699735 | 0.9498682 | 0.9916846 | 0.9998274 |
plot of the metrics distribution
metrics_plot <- metric_avatar_knn5_graft_loss %>% mutate(type = "knn5") %>%
bind_rows(metric_avatar_knn5_large_graft_loss %>% mutate(type = "knn5_augmented")) %>%
bind_rows(metric_survae_graft_loss %>% mutate(type = "survae")) %>%
bind_rows(metric_survae_large_graft_loss %>% mutate(type = "survae_augmented")) %>%
bind_rows(metric_survctgan_graft_loss %>% mutate(type = "survctgan")) %>%
bind_rows(metric_avatar_survctgan_large_graft_loss %>% mutate(type = "survctgan_augmented"))
# dcr
ggplot(metrics_plot, aes(x = dcr, fill = type, color = type, alpha = 0.5)) +
geom_density(adjust = 1.5) +
scale_alpha_identity() +
labs(title = "DCR density Distribution by Group",
x = "DCR",
y = "Density",
fill = "Group",
color = "Group") +
theme_minimal() +
theme(legend.position = "right")
# nndr
ggplot(metrics_plot, aes(x = nndr, fill = type, color = type, alpha = 0.5)) +
geom_density(adjust = 1.5) +
scale_alpha_identity() +
labs(title = "NNDR density distribution per group",
x = "NNDR",
y = "Density",
fill = "Group",
color = "Group") +
theme_minimal() +
theme(legend.position = "right")